## Warning: package 'tibble' was built under R version 3.6.3
## Warning: replacing previous import 'shiny::dataTableOutput' by
## 'DT::dataTableOutput' when loading 'ancom.R'
## Warning: replacing previous import 'shiny::renderDataTable' by
## 'DT::renderDataTable' when loading 'ancom.R'
## Warning: package 'vegan' was built under R version 3.6.3
## Warning: package 'DESeq2' was built under R version 3.6.3
## Warning: package 'phyloseq' was built under R version 3.6.3
##### If you have M3 google drive and bitbucket repo on your local machine,
##### you only need to change two parameters below
# M3-shared google drive location
#M3folder_loc_for_ps='/Google Drive/M3-shared/V4/Data/200312_ASVdata8_updateAsteria/ps_notnorm_age_filter_complete_family.rds'
# Loading ps using actual filepath for analysis (To change depending on the user)
ps_not_norm_comp <- readRDS("~/M3_Datasets/ps_not_norm_age_filter_complete_family.rds")
## Output data location (subject to change)
#output_data=paste0(M3folder_loc, 'Data/V4/180808_ASVdata4/OutputData_Agefiltered/')
output_data <- "~/M3_Datasets/"
# min post DADA2 counts to run analysis
min_postDD=20000
# DESeq significant cutoff
deseq_cut=0.05
# metagenomeSeq significant cutoff
mtgseq_cut=0.05
# chisquare test cutoff (for diet questionnare results significance)
chisq_cut=0.05
# PERMANOVA pvalue and R2 cutoff for visualization
permanova_pcut=0.05
permanova_cut=0.1
# chisquared test function
run_chisq_test <- function(ps, metacol){
# ps: phyloseq object
# metacol: metadata column name to test
metaDF <- data.frame(sample_data(ps))
# remove NAs, for some reason, some NA are recorded as a text!
submetaDF=metaDF[!is.na(metaDF[, metacol]), ]
submetaDF=submetaDF[submetaDF[, metacol]!='NA', ]
submetaDF=submetaDF[submetaDF[, metacol]!='', ] # also remove blank
# chisquared test
chisq_res=chisq.test(table(submetaDF[, metacol], submetaDF[, 'phenotype']))
# extract results
resDT=data.table(chisq_res$observed)
# dcast for printing
resDT <- data.table(dcast(resDT, V1 ~ V2, value.var='N'))
resDT <- resDT[, testvar:=metacol]
resDT <- resDT[, chisq_p:=chisq_res$p.value]
return(resDT[, list(testvar, category=V1, A, N, chisq_p)])
}
# composition plot function
plot_composition <- function(chisq_resDT, var_name){
# chisq_resDT: 4 columns. testvar, category, A, N, chisq_p
plotDT=melt(chisq_resDT, id.vars=c('testvar', 'category', 'chisq_p'))
p=ggplot(data=plotDT[testvar==var_name], aes(x=variable, y=value, fill=category))+
geom_bar(stat="identity")+
xlab('')+ylab('Number of sample')+
ggtitle(var_name)+
theme_minimal()+
theme(legend.title=element_blank(), legend.position="bottom", axis.text.x=element_text(vjust=1, hjust=1))+
scale_fill_manual(values=sgColorPalette)
print(p)
}
#Remove Breastfed and their families
breast_fed <- c("089_A","054_N", "158_N" )
b_fed<-unique(ps_not_norm_comp@sam_data$Family.group.ID[which(ps_not_norm_comp@sam_data$Host.Name %in% breast_fed)])
#remove the whole family
for (i in b_fed){
ps_not_norm_comp<- prune_samples(ps_not_norm_comp@sam_data$Family.group.ID != i, ps_not_norm_comp)
}
#fixing the mapping file for stats by adding categorical vs non catergorical
metadata_ok<-sample_data(ps_not_norm_comp)
write.csv(metadata_ok, "sam_data.csv")
map<-read.csv("sam_data.csv")
meta_cat <- read.csv("updated_metacategories.csv")
for (i in meta_cat$varname.16S.V4[which(meta_cat$permanova != FALSE)]){
if (meta_cat$permanova[which(meta_cat$varname.16S.V4 == i)] == "Categorical") {
map[,i] <- as.factor(map[,i])
} else {
map[,i] <- as.numeric(map[,i])
}
}
makeFieldsNumeric <- function(map){
handleNAs <- function(vec){
vec[vec == ""] <- "NA"
vec[is.na(vec)] <- "NA"
return(vec)
}
map$Stool.frequency <- handleNAs(as.character(map$Stool.frequency))
map$Stool.frequency[as.character(map$Stool.frequency) == "Less than 1"] = 0
map$Stool.frequency[as.character(map$Stool.frequency) == "5 or more"] = 5
map$Dairy..consumption.frequency...longitudinal.[map$Dairy..consumption.frequency...longitudinal. == 5] <- "3-4 meals per week"
#map$LR2[map$LR2 == "1 (ASD)"] = 1
#map$LR2[map$LR2 == "0 (non-ASD"] = 0
freq_dict_2 <- list("Never" = 0, "Rarely" = 1, "Occasionally" = 2, "Regularly" = 3, "Weekly" = 4, "weekly" = 4,
"Several time weekly" = 5, "Several times weekly" = 5, "Daily" = 6, "NA" = NA)
dict_2_items <- c("Whole.grain..consumption.frequency.", "Fermented.vegetable..consumption.frequency.", "Dairy..consumption.frequency.", "Fruit..consumption.frequency.", "Meals.prepared.at.home..consumption.frequency.", "Ready.to.eat.meals..consumption.frequency.", "Red.meat..consumption.frequency.", "Olive.oil.used.in.cooking..M3.", "Seafood..consumption.frequency.", "Sweetened.drink..consumption.frequency.", "Vegetable..consumption.frequency.",
"Restaurant.prepared.meals..consumption.frequency.", "Sugary.food..consumption.frequency.", "Probiotic..consumption.frequency.", "Vitamin.B.complex.supplement..consumption.frequency.", "Vitamin.D..consumption.frequency.")
for(item in dict_2_items){
print(item)
tmp <- rep(NA, nrow(map))
freqs <- handleNAs(map[,item])
numeric_rep <- unlist(freq_dict_2[freqs])
print(paste("Numeric rep length: ", length(numeric_rep)))
print(sum(!is.na(freqs)))
tmp[!is.na(freqs)] <- as.numeric(numeric_rep)
map[ , item] <- tmp
}
freq_dict_1 <- list("Never or less than once per week" = 0, "3-4 meals per week" = 1, "5" = 2, "7-10 meals per week" = 3, "Almost every meal" = 4, "NA" = NA)
dict_1_items <- c("Starchy.food..consumption.frequency...longitudinal.", "Meats.and.seafood..consumption.frequency...longitudinal.", "Bread..consumption.frequency...longitudinal.", "Dairy..consumption.frequency...longitudinal.", "Dietary.fat.and.oil..consumption.frequency...longitudinal.", "Vegetable..consumption.frequency...longitudinal.",
"Fruit..consumption.frequency...longitudinal.")
for(item in dict_1_items){
print(item)
tmp <- rep(NA, nrow(map))
freqs <- handleNAs(map[ , item])
numeric_rep <- unlist(freq_dict_1[freqs])
print(paste("Numeric rep length: ", length(numeric_rep)))
print(sum(!is.na(freqs)))
tmp[!is.na(freqs)] <- as.numeric(numeric_rep)
map[ , item] <- tmp
}
#may add more, but these variable only apply to phenotype for autism
freq_dict_2 <- list("Able to speak fluently" = 3,"Phrase speech"=2, "Single word speech"=1, "Little to no speech" = 0, "Able to have conversation" = 3, "Limited conversation ability" = 2, "Difficulty with conversation" = 1, "Cannot have a conversation" = 0,"Understands about half of words" = 1, "Understands few or no words"= 0, "Understands many words" = 2, "Understands most words"= 3, "Understands nearly all words" = 4 ,"NA" = NA)
dict_2_items <- c("Language.ability.and.use", "Conversation.ability", "Understands.speech")
for(item in dict_2_items){
print(item)
tmp <- rep(NA, nrow(map))
freqs <- handleNAs(map[ , item])
numeric_rep <- unlist(freq_dict_2[freqs])
print(paste("Numeric rep length: ", length(numeric_rep)))
print(sum(!is.na(freqs)))
tmp[!is.na(freqs)] <- as.numeric(numeric_rep)
map[ , item] <- tmp
}
map <- map[!duplicated(map$Biospecimen.Barcode), ]
rownames(map) <- map$Biospecimen.Barcode
return(map)
}
map<-makeFieldsNumeric(map)
## [1] "Whole.grain..consumption.frequency."
## [1] "Numeric rep length: 447"
## [1] 447
## [1] "Fermented.vegetable..consumption.frequency."
## [1] "Numeric rep length: 450"
## [1] 450
## [1] "Dairy..consumption.frequency."
## [1] "Numeric rep length: 450"
## [1] 450
## [1] "Fruit..consumption.frequency."
## [1] "Numeric rep length: 450"
## [1] 450
## [1] "Meals.prepared.at.home..consumption.frequency."
## [1] "Numeric rep length: 450"
## [1] 450
## [1] "Ready.to.eat.meals..consumption.frequency."
## [1] "Numeric rep length: 450"
## [1] 450
## [1] "Red.meat..consumption.frequency."
## [1] "Numeric rep length: 78"
## [1] 78
## [1] "Olive.oil.used.in.cooking..M3."
## [1] "Numeric rep length: 447"
## [1] 447
## [1] "Seafood..consumption.frequency."
## [1] "Numeric rep length: 447"
## [1] 447
## [1] "Sweetened.drink..consumption.frequency."
## [1] "Numeric rep length: 447"
## [1] 447
## [1] "Vegetable..consumption.frequency."
## [1] "Numeric rep length: 450"
## [1] 450
## [1] "Restaurant.prepared.meals..consumption.frequency."
## [1] "Numeric rep length: 447"
## [1] 447
## [1] "Sugary.food..consumption.frequency."
## [1] "Numeric rep length: 450"
## [1] 450
## [1] "Probiotic..consumption.frequency."
## [1] "Numeric rep length: 450"
## [1] 450
## [1] "Vitamin.B.complex.supplement..consumption.frequency."
## [1] "Numeric rep length: 450"
## [1] 450
## [1] "Vitamin.D..consumption.frequency."
## [1] "Numeric rep length: 450"
## [1] 450
## [1] "Starchy.food..consumption.frequency...longitudinal."
## [1] "Numeric rep length: 421"
## [1] 421
## [1] "Meats.and.seafood..consumption.frequency...longitudinal."
## [1] "Numeric rep length: 421"
## [1] 421
## [1] "Bread..consumption.frequency...longitudinal."
## [1] "Numeric rep length: 421"
## [1] 421
## [1] "Dairy..consumption.frequency...longitudinal."
## [1] "Numeric rep length: 421"
## [1] 421
## [1] "Dietary.fat.and.oil..consumption.frequency...longitudinal."
## [1] "Numeric rep length: 421"
## [1] 421
## [1] "Vegetable..consumption.frequency...longitudinal."
## [1] "Numeric rep length: 421"
## [1] 421
## [1] "Fruit..consumption.frequency...longitudinal."
## [1] "Numeric rep length: 421"
## [1] 421
## [1] "Language.ability.and.use"
## [1] "Numeric rep length: 225"
## [1] 225
## [1] "Conversation.ability"
## [1] "Numeric rep length: 225"
## [1] 225
## [1] "Understands.speech"
## [1] "Numeric rep length: 225"
## [1] 225
map_levels<-sapply(map, levels)
map_levelscount<-sapply(map_levels, length)
mapnotfac <- names(map_levelscount[which(map_levelscount >= 18)])
for (i in mapnotfac){
map[,i]<-as.character(map[,i])
}
sample_data(ps_not_norm_comp) <- map
#remove samples that are too young and their paired sibling
tooyoung <-ps_not_norm_comp@sam_data$Family.group.ID[which(ps_not_norm_comp@sam_data$Age..months. < 24)]
#family 65 should be removed according to >24 months criteria
ps_not_norm_comp<- prune_samples(ps_not_norm_comp@sam_data$Family.group.ID != unique(tooyoung), ps_not_norm_comp)
#List of individuals that were reported w/ autism, but was not classified as such through MARA and/or video classifier
pheno_contrad <-read.csv("phenotype_contradictions.csv")
contradicting<-unique(ps_not_norm_comp@sam_data$Family.group.ID[which(ps_not_norm_comp@sam_data$Host.Name %in% as.character(pheno_contrad$host_name))])
#remove the whole family
for (i in contradicting){
ps_not_norm_comp<- prune_samples(ps_not_norm_comp@sam_data$Family.group.ID != i, ps_not_norm_comp)
}
#round off year
ps_not_norm_comp@sam_data$Age..years.<-round(ps_not_norm_comp@sam_data$Age..years.)
metadata_ok<-ps_not_norm_comp@sam_data
#now let's only run the categorical values for chi square and remove the first column which are not metadata
num_cat<-names(Filter(is.numeric, metadata_ok))
fac_cat<-names(Filter(is.factor, metadata_ok))
#removing the first 13 columns, since it's not metadata and the last one which is phenotype
fac_cat<-fac_cat[-c(1:13, length(fac_cat))]
#finally remiving the ones that were only asked for the children with ASD, or only have one factor & NA, or only present in one phen
fac_cat<-fac_cat[-which(fac_cat %in% c("Behavior.video.submitted..M3.","Language.ability.and.use","Conversation.ability","Understands.speech","Plays.imaginatively.when.alone","Plays.imaginatively.with.others","Plays.in.a.group.with.others","Eye.contact.finding","Childhood.behavioral.development.finding","Repetitive.motion","Picks.up.objects.to.show.to.others","Sleep.pattern.finding","Response.to.typical.sounds","Self.injurious.behavior.finding","Gastrointestinal.problems..M3.", "Imitation.behavior", "Other.stool.sample.collection.method.explained..M3.", "Flu.shot.in.the.last..MFlu.shot.in.the.last..M3.", "Pica.disease", "Additional.info.affecting.microbiome..M3.", "Dietary.restrictions.details..M3.", "Pet.bird"))]
#Also remove the ones with only one factor (no chi-square possible)
#now running the chisquare on all categorical values
chisquare_p.val=c()
names_chisquare_p.val=c()
all_chisquare=list()
chi_list<-names(map_levelscount)[map_levelscount > 1]
chi_list <-chi_list[-c(1:9)]
chi_list<-chi_list[-which(chi_list %in% c("Behavior.video.submitted..M3.","Language.ability.and.use","Conversation.ability","Understands.speech","Plays.imaginatively.when.alone","Plays.imaginatively.with.others","Plays.in.a.group.with.others","Eye.contact.finding","Childhood.behavioral.development.finding","Repetitive.motion","Picks.up.objects.to.show.to.others","Sleep.pattern.finding","Response.to.typical.sounds","Self.injurious.behavior.finding","Gastrointestinal.problems..M3.", "Imitation.behavior", "Other.stool.sample.collection.method.explained..M3.", "Flu.shot.in.the.last..MFlu.shot.in.the.last..M3.", "Pica.disease", "Additional.info.affecting.microbiome..M3.", "Dietary.restrictions.details..M3.", "Pet.bird", "Host.disease.status", "phenotype"))]
for (i in 1:length(chi_list)){
tmp<-run_chisq_test(ps_not_norm_comp, chi_list[i])
chisquare_p.val<-c(chisquare_p.val,min(tmp$chisq_p))
names_chisquare_p.val<-c(names_chisquare_p.val,chi_list[i])
all_chisquare[[i]]<-tmp
}
names(chisquare_p.val)<-names_chisquare_p.val
names(all_chisquare) <-chi_list
# p-value correction
chisquare_p.val<-p.adjust(chisquare_p.val)
chisquare_p.val<-chisquare_p.val[chisquare_p.val < 0.05]
length(chisquare_p.val) #19
## [1] 18
chisquare_p.val
## Functional.bowel.finding Specific.food.allergy
## 1.144522e-07 4.218111e-02
## Dietary.regime GI.symptoms.within.3.months..M3.
## 6.019477e-03 2.667426e-14
## Biological.sex GI.issues.this.week..M3.
## 2.508864e-07 2.201005e-10
## Non.celiac.gluten.sensitivity Lactose.intolerance
## 7.552762e-05 8.750901e-03
## Dietary.restrictions..M3. Dietary.supplement
## 1.272811e-07 1.179068e-08
## LR6.prediction..M3. LR10.prediction..M3.
## 3.846973e-15 1.243390e-11
## LR5.prediction..M3. Toilet.trained
## 4.648228e-16 1.012506e-02
## Other.symptoms.this.week..M3. Recent.anxiety..caretaker.reported.
## 2.477139e-02 7.475532e-04
## Toilet.cover..M3. Most.recent.GI.episode.symptoms..M3.
## 1.027156e-03 1.604970e-04
#vizualisation of the results
#select only the signififcant ones
all_chisquare<-all_chisquare[names(all_chisquare) %in% names(chisquare_p.val)]
#save this into a csv
write.csv(format(chisquare_p.val, digits=2), file=paste0(output_data,"Xsqr_05.csv"), quote=F)
# plot one example out of 29
plot_composition(all_chisquare[1], names(all_chisquare)[1])
write.csv(metadata_ok, "sam_data2.csv")
map<-read.csv("sam_data2.csv")
map$phenotype_num <- gsub("A", "1", map$phenotype)
map$phenotype_num <- as.numeric(gsub("N", "0", map$phenotype_num))
map_num<-sapply(map, is.numeric)
num_cat <- colnames(map[,as.vector(which(map_num == TRUE))])
num_cat <- num_cat[-c(1:4)]
num_cat <- num_cat[-which(num_cat %in% c("Language.ability.and.use", "Conversation.ability", "Understands.speech", "Mobile.Autism.Risk.Assessment.Score", "phenotype_num"))]
tmp<-slope.test(map[,num_cat[1]], map[,"phenotype_num"])
slopetesttab <- tibble(num_cat[1], tmp$p)
colnames(slopetesttab) <- c("var", "p_val")
for (i in 2:length(num_cat)){
tmp<-slope.test(map[,num_cat[i]], map[,"phenotype_num"], method = 0)
tmp <- tibble(num_cat[i], tmp$p)
colnames(tmp) <- c("var", "p_val")
slopetesttab <-rbind(slopetesttab, tmp)
}
slope_p.val<-p.adjust(slopetesttab$p_val)
slopesig_p.val<-slopetesttab$var[slope_p.val < 0.05]
slopesig_p.val
## [1] "Number.of.roommates"
## [2] "Number.of.pet.dogs"
## [3] "Number.of.pet.cats"
## [4] "Number.of.pet.fish"
## [5] "Number.of.pet.birds"
## [6] "Number.of.pet.horses"
## [7] "Number.of.pet.reptiles"
## [8] "Number.of.small.pet.herbivores"
## [9] "Number.of.small.pet.rodents"
## [10] "Minimum.time.since.antibiotics"
## [11] "Stool.frequency"
## [12] "Whole.grain..consumption.frequency."
## [13] "Fermented.vegetable..consumption.frequency."
## [14] "Dairy..consumption.frequency."
## [15] "Fruit..consumption.frequency."
## [16] "Meals.prepared.at.home..consumption.frequency."
## [17] "Ready.to.eat.meals..consumption.frequency."
## [18] "Olive.oil.used.in.cooking..M3."
## [19] "Seafood..consumption.frequency."
## [20] "Sweetened.drink..consumption.frequency."
## [21] "Vegetable..consumption.frequency."
## [22] "Restaurant.prepared.meals..consumption.frequency."
## [23] "Sugary.food..consumption.frequency."
## [24] "Probiotic..consumption.frequency."
## [25] "Vitamin.B.complex.supplement..consumption.frequency."
## [26] "Vitamin.D..consumption.frequency."
## [27] "LR6.probability.not.ASD..M3."
## [28] "LR6.probability.ASD..M3."
## [29] "LR10.probability.not.ASD..M3."
## [30] "LR10.probability.ASD..M3."
## [31] "LR5.probability.not.ASD..M3."
## [32] "LR5.probability.ASD..M3."
## [33] "Age..years."
## [34] "Starchy.food..consumption.frequency...longitudinal."
## [35] "Meats.and.seafood..consumption.frequency...longitudinal."
## [36] "Dietary.fat.and.oil..consumption.frequency...longitudinal."
## [37] "Vegetable..consumption.frequency...longitudinal."
## [38] "Fruit..consumption.frequency...longitudinal."
## [39] "Red.meat..consumption.frequency."
# 39 out of 47 were significant, some due to just a lot of zeros perhaps
# print number table
table(sample_data(ps_not_norm_comp)$Racial.group, sample_data(ps_not_norm_comp)$Biological.sex)
##
## Female Male
## Asian race 12 30
## Asian race & Middle Eastern race 3 3
## Asian race & Unknown racial group 3 3
## Caucasian 81 165
## Caucasian & Hispanic 0 12
## Caucasian & Indian (subcontinent) race 0 6
## Hispanic 6 18
## Hispanic & African race 6 0
## Indian (subcontinent) race 3 3
## Indian race 6 6
## Unknown racial group 0 12
# run chisquared test
race=run_chisq_test(ps_not_norm_comp, 'Racial.group')
# print results
pander(race)
| testvar | category | A | N | chisq_p |
|---|---|---|---|---|
| Racial.group | Asian race | 21 | 21 | 1 |
| Racial.group | Asian race & Middle Eastern race | 3 | 3 | 1 |
| Racial.group | Asian race & Unknown racial group | 3 | 3 | 1 |
| Racial.group | Caucasian | 123 | 123 | 1 |
| Racial.group | Caucasian & Hispanic | 6 | 6 | 1 |
| Racial.group | Caucasian & Indian (subcontinent) race | 3 | 3 | 1 |
| Racial.group | Hispanic | 12 | 12 | 1 |
| Racial.group | Hispanic & African race | 3 | 3 | 1 |
| Racial.group | Indian (subcontinent) race | 3 | 3 | 1 |
| Racial.group | Indian race | 6 | 6 | 1 |
| Racial.group | Unknown racial group | 6 | 6 | 1 |
# plot
plot_composition(race, 'Racial.group')
# % table
race_prop=prop.table(as.matrix(race[, .(A, N)]), margin=2)*100
row.names(race_prop) <- race$category
pander(race_prop)
| Â | A | N |
|---|---|---|
| Asian race | 11.11 | 11.11 |
| Asian race & Middle Eastern race | 1.587 | 1.587 |
| Asian race & Unknown racial group | 1.587 | 1.587 |
| Caucasian | 65.08 | 65.08 |
| Caucasian & Hispanic | 3.175 | 3.175 |
| Caucasian & Indian (subcontinent) race | 1.587 | 1.587 |
| Hispanic | 6.349 | 6.349 |
| Hispanic & African race | 1.587 | 1.587 |
| Indian (subcontinent) race | 1.587 | 1.587 |
| Indian race | 3.175 | 3.175 |
| Unknown racial group | 3.175 | 3.175 |
# write
write.csv(race_prop, file=paste0(output_data, 'Race.csv'))
# make sure it is numeric
sample_data(ps_not_norm_comp)$Age..months. <- as.numeric(sample_data(ps_not_norm_comp)$Age..months.)
# plot
ggplot(data=sample_data(ps_not_norm_comp), aes(x=phenotype, y=Age..months., fill=phenotype))+
geom_boxplot(width=0.7, outlier.colour='white')+
geom_jitter(size=1, position=position_jitter(width=0.1))+
xlab('')+ylab('Age (months)')+
scale_fill_manual(values=sgColorPalette)+
theme_minimal()
# run tests to check significance
shapiro.test(sample_data(ps_not_norm_comp)$Age..months.) #not normal we need a reanking test
##
## Shapiro-Wilk normality test
##
## data: sample_data(ps_not_norm_comp)$Age..months.
## W = 0.96494, p-value = 7.196e-08
wilcox.test(Age..months. ~ phenotype, data=data.frame(sample_data(ps_not_norm_comp)), var.equal=FALSE)
##
## Wilcoxon rank sum test with continuity correction
##
## data: Age..months. by phenotype
## W = 20889, p-value = 0.004351
## alternative hypothesis: true location shift is not equal to 0
#Let's generalized all the values with numeric input
wilcox_pval=c()
for (i in 1:length(num_cat)){
#if (levels(get(num_cat[i], metadata_ok)) >= 2)
tmp<-wilcox.test(get(num_cat[i]) ~ phenotype, data=map, var.equal=FALSE)
wilcox_pval<-c(wilcox_pval,tmp$p.value)
}
names(wilcox_pval)<-num_cat
#correction
wilcox_pval<-p.adjust(wilcox_pval)
wilcox_pval[wilcox_pval<0.05] #LRprobabilities , Veget, bread, dairy fruit, sweet drink
## <NA>
## NA
## <NA>
## NA
## <NA>
## NA
## Sweetened.drink..consumption.frequency.
## 1.913695e-02
## Vegetable..consumption.frequency.
## 9.347546e-03
## LR6.probability.not.ASD..M3.
## 3.528011e-17
## LR6.probability.ASD..M3.
## 3.528011e-17
## LR10.probability.not.ASD..M3.
## 3.546779e-18
## LR10.probability.ASD..M3.
## 3.546779e-18
## LR5.probability.not.ASD..M3.
## 2.282773e-17
## LR5.probability.ASD..M3.
## 2.282773e-17
## Bread..consumption.frequency...longitudinal.
## 4.192429e-05
## Dairy..consumption.frequency...longitudinal.
## 6.348948e-05
## Vegetable..consumption.frequency...longitudinal.
## 4.310018e-03
## Fruit..consumption.frequency...longitudinal.
## 9.347546e-03
Dietary variance amongst ASD patients will also be assessed. Based on preliminary analyses, we expect that ASD participants, collectively, will exhibit a minimal degree of dietary variance.
# read metadata category file
#meta_cat=fread(paste0(M3folder_loc, meta_cat_fp), header=TRUE)
#Created this csv from the file in the shared M3 google drive, took the first sheet and removed the last incomplete column, then converted to csv
meta_cat<-read.csv("updated_metacategories.csv")
colnames(meta_cat)[1] <- "varname"
# list of diet questions
diet_info<-metadata_ok[,colnames(metadata_ok) %in% meta_cat$varname[which(meta_cat$diet==TRUE)]]
#additionnal error to remove: filled with only NA or one factor, cant do chisquare on one factor
diet_col_levels<-sapply(diet_info, levels)
dietcol_levelscount<-sapply(diet_col_levels, length)
#Since there numerics dietary variables are only two and filled primarily with NAs (as shown by line 248, we will omit)
#diet_info[,which(sapply(diet_info, class) == "numeric")]
diet_info <- diet_info[,which(dietcol_levelscount >= 2)]
#dietq_col <-which(colnames(sample_data(ps_not_norm_comp)) %in% colnames(diet_info))
dietq_col <- colnames(diet_info)
# for each variable, summarize and check if A vs N different? (we hypothesized variance in ASD are the same as NT?)
master_resDT=NULL
for(i in dietq_col){
resDT=run_chisq_test(ps_not_norm_comp, i)
# add to master
master_resDT <- rbindlist(list(master_resDT, resDT))
}
# variables tested
unique(master_resDT$testvar)
## [1] "Dietary.regime" "Meat..consumption.frequency."
## [3] "Multivitamin" "Dietary.supplement"
# order by significance
master_resDT <- master_resDT[order(chisq_p)]
# print table
datatable(master_resDT)
# write csv file
write.csv(master_resDT, file=paste0(output_data, 'Dietary_var_all_proportion.csv'))
write.csv(unique(master_resDT[, list(testvar, chisq_p)]), file=paste0(output_data, 'Dietary_var_chisq_test.csv'))
# plot top 3 most significant vars
plot_diet=master_resDT[testvar %in% unique(master_resDT[, list(testvar, chisq_p)])$testvar[1:3]]
for(i in unique(plot_diet$testvar)){
plot_composition(plot_diet, i)
}
dir.create(paste0(output_data, 'Normalized/'))
#Filtering of the prevalence:
###Declare function to filter
filterTaxaByPrevolence <- function(ps, percentSamplesPresentIn){
prevalenceThreshold <- percentSamplesPresentIn * nsamples(ps)
toKeep <- apply(data.frame(otu_table(ps)), 1, function(taxa) return(sum(taxa > 0) > prevalenceThreshold))
ps_filt <- prune_taxa(toKeep, ps)
return(ps_filt)
}
#CSS norm function
#We actually will plot everything with CSS
CSS_norm<-function(ps){
ps.metaG<-phyloseq_to_metagenomeSeq(ps)
p_stat = cumNormStatFast(ps.metaG)
ps.metaG = cumNorm(ps.metaG, p = p_stat)
ps.metaG.norm <- MRcounts(ps.metaG, norm = T)
ps_CSS<-phyloseq(otu_table(ps.metaG.norm, taxa_are_rows = T), sample_data(ps),tax_table(ps))
return(ps_CSS)
}
#Deseq norm
deSeqNorm <- function(ps){
ps_dds <- phyloseq_to_deseq2(ps, ~ phenotype)
ps_dds <- estimateSizeFactors(ps_dds, type = "poscounts")
ps_dds <- estimateDispersions(ps_dds)
abund <- getVarianceStabilizedData(ps_dds)
abund <- abund + abs(min(abund)) #don't allow deseq to return negative counts
ps_deSeq <- phyloseq(otu_table(abund, taxa_are_rows = T), sample_data(ps), tax_table(ps))
return(ps_deSeq)
}
#Now we remove the taxa present in less than 3 % of the samples with some basic filtering
filtered_ps003<-filterTaxaByPrevolence(ps_not_norm_comp, 0.03)
filtered_ps003
## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 858 taxa and 378 samples ]
## sample_data() Sample Data: [ 378 samples by 365 sample variables ]
## tax_table() Taxonomy Table: [ 858 taxa by 8 taxonomic ranks ]
saveRDS(filtered_ps003, file=paste0(output_data, "Normalized/ps_not_norm_comp_pass_min_postDD_min0.03.Rda"))
# CSS normalization
ps_CSS_norm_pass_min_postDD_sup003<-CSS_norm(filtered_ps003)
saveRDS(ps_CSS_norm_pass_min_postDD_sup003, file=paste0(output_data, "Normalized/ps_CSS_pass_min_postDD_min0.03.Rda"))
# DESeq normalization
ps_DeSeq_norm_pass_min_postDD_sup003<-deSeqNorm(filtered_ps003)
saveRDS(ps_DeSeq_norm_pass_min_postDD_sup003, file=paste0(output_data, "Normalized/ps_DeSeq_pass_min_postDD_min0.03.Rda"))
# TSS normalization
propDF=prop.table(as.matrix(otu_table(filtered_ps003)), margin=2)
ps_TSS_norm_pass_min_postDD_sup003 <- phyloseq(otu_table(propDF, taxa_are_rows=TRUE),
tax_table(filtered_ps003),
sample_data(filtered_ps003))
#format asv table with timepoint + hostname info
asv_table<-t(otu_table(ps_DeSeq_norm_pass_min_postDD_sup003))
asv_table <- as.data.frame(asv_table)
asv_table$timepoint <- ps_DeSeq_norm_pass_min_postDD_sup003@sam_data$Within.study.sampling.date
asv_table$timepoint <- as.factor(asv_table$timepoint)
asv_table$Host.Name <- ps_DeSeq_norm_pass_min_postDD_sup003@sam_data$Host.Name
#Create initial table of first asv to build off of
tmp <-summary(aov(asv_table[,1] ~ timepoint + Error(Host.Name/timepoint), data = asv_table))
p_val <-tmp$`Error: Host.Name:timepoint`[[1]]$`Pr(>F)`[1]
anova_asv_res <- tibble(colnames(asv_table[1]), p_val)
colnames(anova_asv_res) <- c("ASV", "p_val")
#Run for each asv
for (i in 2:(length(colnames(asv_table))-2)) {
form <-as.formula(paste0("asv_table[," , i, "]", " ~ timepoint + Error(Host.Name/timepoint)"))
tmp <-summary(aov(formula = form, data = asv_table))
p_val <-tmp$`Error: Host.Name:timepoint`[[1]]$`Pr(>F)`[1]
tmpres <- tibble(colnames(asv_table[i]), p_val)
colnames(tmpres) <- c("ASV", "p_val")
anova_asv_res<- rbind(anova_asv_res, tmpres)
}
#find sig ones btwn timepoints
asv_sig_btwn_timep_DES<-anova_asv_res$ASV[which(anova_asv_res$p_val <= 0.05)]
asv_sig_btwn_timep_DES
## [1] "GCAAGCGTTAATCGGAATTACTGGGCGTAAAGCGCGCGTAGGTGGTTCAGCAAGTTGGATGTGAAATCCCCGGGCTCAACCTGGGAACTGCATCCAAAACTACTGAGCTAGAGTACGGTAGAGGGTGGTGGAATTTCCTGTGTAGCGGTGAAATGCGTAGATATAGGAAGGAACACCAGTGGCGAAGGCGACCACCTGGACTGATACTGACACTGAGGTGCGAAAGCGTGGGG"
## [2] "GCAAGCGTTATCCGGAATCATTGGGCGTAAAGGGTGCGTAGGTGGCGTACTAAGTCTGTAGTAAAAGGCAATGGCTCAACCATTGTAAGCTATGGAAACTGGTATGCTAGAGTGCAGAAGAGGGCGATGGAATTCCATGTGTAGCGGTAAAATGCGTAGATATATGGAGGAACACCAGTGGCGAAGGCGGTCGCCTGGTCTGTAACTGACACTGAGGCACGAAAGCGTGGGG"
## [3] "GCAAGCGTTATCCGGATTTACTGGGCGTAAAGGGAGCGTAGGCGGATATTTAAGTGGGATGTGAAATACCTGAGCTTAACTTGGGAGCTGCATTCCAAACTGGATATCTAGAGTGCAGGAGAGGAGAATGGAATTCCTAGTGTAGCGGTGAAATGCGTAGAGATTAGGAAGAACACCAGTGGCGAAGGCGATTCTCTGGACTGTAACTGACGCTGAGGCTCGAAAGCGTGGGG"
## [4] "GCAAGCGTTATCCGGATTTACTGGGTGTAAAGGGAGCGCAGGCGGCATGATAAGTCTGATGTGAAAACCCAAGGCTCAACCATGGGACTGCATTGGAAACTGTCGTGCTAGAGTGTCGGAGAGGTAAGCGGAATTCCTAGTGTAGCGGTGAAATGCGTAGATATTAGGAGGAACACCAGTGGCGAAGGCGGCTTACTGGACGATGACTGACGCTGAGGCTCGAAAGCGTGGGG"
## [5] "GCAAGCGTTATCCGGATTTACTGGGTGTAAAGGGAGCGCAGGCGGGACTGCAAGTTGGATGTGAAATACCGTGGCTTAACCACGGAACTGCATCCAAAACTGTAGTTCTTGAGTGAAGTAGAGGCAAGCGGAATTCCGAGTGTAGCGGTGAAATGCGTAGATATTCGGAGGAACACCAGTGGCGAAGGCGGCTTGCTGGGCTTTAACTGACGCTGAGGCTCGAAAGTGTGGGG"
## [6] "GCAAGCGTTATCCGGATTTACTGGGTGTAAAGGGAGCGCAGGCGGTCTGGCAAGTCTGATGTGAAAATCCGGGGCTCAACTCCGGAACTGCATTGGAAACTGTCAGACTAGAGTGTCGGAGAGGTAAGTGGAATTCCTAGTGTAGCGGTGAAATGCGTAGATATTAGGAGGAACACCAGTGGCGAAGGCGGCTTACTGGACGATAACTGACGCTGAGGCTCGAAAGCGTGGGG"
## [7] "GCAAGCGTTATCCGGATTTACTGGGTGTAAAGGGAGCGTAGACGGCAGTGCAAGTCTGAAGTGAAAGCCTGGGGCTCAACCCCGGAACTGCTTTGGAAACTGTGCTGCTTGAGTACCGGAGGGGTAAGCGGAATTCCTAGTGTAGCGGTGAAATGCGTAGATATTAGGAGGAACACCAGTGGCGAAGGCGGCTTACTGGACGGTAACTGACGTTGAGGCTCGAAAGCGTGGGG"
## [8] "GCAAGCGTTATCCGGATTTACTGGGTGTAAAGGGAGCGTAGACGGCAGTGCAAGTCTGAAGTGAAAGGCTGGGGCTCAACCCCGGAACTGCTTTGGAAACTGTGCTGCTTGAGTACCGGAGGGGTAAGCGGAATTCCTAGTGTAGCGGTGAAATGCGTAGATATTAGGAGGAACACCAGTGGCGAAGGCGGCTTACTGGACGGTAACTGACGTTGAGGCTCGAAAGCGTGGGG"
## [9] "GCAAGCGTTATCCGGATTTACTGGGTGTAAAGGGAGCGTAGACGGCGATGCAAGTCTGGAGTGAAAGCCCAGGGCTCAACCCTGGGACTGCTTTGGAAACTGTATGGCTAGAGTGCTGGAGAGGTAAGTGGAATTCCTAGTGTAGCGGTGAAATGCGTAGATATTAGGAGGAACACCAGTGGCGAAGGCGGCTTACTGGACAGTAACTGACGTTGAGGCTCGAAAGCGTGGGG"
## [10] "GCAAGCGTTATCCGGATTTACTGGGTGTAAAGGGAGCGTAGGCGGTATGGCAAGTCTGATGTGAAAGGCCGGGGCTCAACCCCGGGACTGCATTGGAAACTGCCAGACTAGAGTGTCGGAGAGGTAAGTGGAATTCCTAGTGTAGCGGTGAAATGCGTAGATATTAGGAGGAACACCAGTGGCGAAGGCGGCTTACTGGACGACAACTGACGCTGAGGCTCGAAAGCGTGGGG"
## [11] "GCAAGCGTTATCCGGATTTACTGGGTGTAAAGGGAGCGTAGGCGGTTATGCAAGTCCGATGTGAAAGCCCGGGGCTTAACCCCGGGACTGCATTAGAAACTGTGTAACTAGAGTGTCGGAGAGGTAAGTGGAATTCCTAGTGTAGCGGTGAAATGCGTAGATATTAGGAGGAACACCAGTGGCGAAGGCGGCTTACTGGACGATAACTGACGCTGAGGCTCGAAAGCGTGGGG"
## [12] "GCAAGCGTTATCCGGATTTACTGGGTGTAAAGGGTGCGTAGGTGGTATGGCAAGTCAGAAGTGAAAGGCTGGGGCTCAACCCCGGGACTGCTTTTGAAACTGTCAAACTAGAGTACAGGAGAGGAAAGCGGAATTCCTAGTGTAGCGGTGAAATGCGTAGATATTAGGAGGAACACCAGTGGCGAAGGCGGCTTTCTGGACTGAAACTGACACTGAGGCACGAAAGCGTGGGG"
## [13] "GCAAGCGTTATCCGGATTTATTGGGCGTAAAGCGAGCGCAGGCGGTTTTTTAAGTCTGATGTGAAAGCCCTCGGCTTAACCGAGGAAGTGCATCGGAAACTGGGAAACTTGAGTACAGAAGAGGACAGTGGAACTCCATGTGTAGCGGTGAAATGCGTAGATATATGGAAGAACACCAGTGGCGAAGGCGGCTGTCTGGTCTGTAACTGACGCTGAGGCTCGAAAGCATGGGT"
## [14] "GCAAGCGTTATCCGGATTTATTGGGTGTAAAGGGTGCGTAGACGGGAAGGTAAGTTAGTTGTGAAATCCCTCGGCTCAACTGAGGAACTGCGACTAAAACTGCTTTTCTTGAGTGCTGGAGAGGAAAGTGGAATTCCTAGTGTAGCGGTGAAATGCGTAGATATTAGGAGGAACACCAGTGGCGAAGGCGACTTTCTGGACAGCAACTGACGTTGAGGCACGAAAGTGTGGGG"
## [15] "GCAAGCGTTGTCCGGAATAATTGGGCGTAAAGGGCGCGTAGGCGGCTCGGTAAGTCTGGAGTGAAAGTCCTGCTTTTAAGGTGGGAATTGCTTTGGATACTGTCGGGCTTGAGTGCAGGAGAGGTTAGTGGAATTCCCAGTGTAGCGGTGAAATGCGTAGAGATTGGGAGGAACACCAGTGGCGAAGGCGACTAACTGGACTGTAACTGACGCTGAGGCGCGAAAGTGTGGGG"
## [16] "GCAAGCGTTGTCCGGAATTACTGGGCGTAAAGGGTGCGTAGGCGGCCGTTTAAGTCAGATGTGAAATACCCGTGCTTAACATGGGGGCTGCATTTGAAACTGGATGGCTTGAGTGCAGGAGAGGCAAGTGGAATTCCTAGTGTAGCGGTGAAATGCGTAGATATTAGGAGGAACACCAGTGGCGAAGGCGACTTGCTGGACTGTAACTGACGCTGAGGCACGAAAGCGTGGGG"
## [17] "GCAAGCGTTGTCCGGAATTACTGGGCGTAAAGGGTGCGTAGGCGGCGCGTTAAGTCAGATGTGAAATACCCGTGCTTAACATGGGGGGTGCATCTGATACTGGCGTGCTTGAGTGCAGGAGAGGAAAGCGGAATTCCTAGTGTAGCGGTGAAATGCGTAGATATTAGGAGGAACACCAGTGGCGAAGGCGGCTTTCTGGACTGTAACTGACGCTGAGGCACGAAAGCGTGGGG"
## [18] "GCAAGCGTTGTCCGGAATTACTGGGCGTAAAGGGTGCGTAGGTGGCCATGTAAGTCAGGTGTGAAAGACCGGGGCTCAACCCCGGGGTTGCACTTGAAACTGTGTGGCTTGAGTACAGGAGAGGGAAGTGGAATTCCTAGTGTAGCGGTGAAATGCGTAGATATTAGGAGGAACACCAGTGGCGAAGGCGACTTTCTGGACTGTAACTGACACTGAGGCACGAAAGCGTGGGG"
## [19] "GCAAGCGTTGTCCGGAATTACTGGGTGTAAAGGGAGCGCAGGCGGGAGAGCAAGTTGGAAGTGAAATCTGTGGGCTCAACTCACAAATTGCTTTCAAAACTGTTTTTCTTGAGTGGTGTAGAGGTAGGCGGAATTCCCGGTGTAGCGGTGGAATGCGTAGATATCGGGAGGAACACCAGTGGCGAAGGCGGCCTACTGGGCACTAACTGACGCTGAGGCTCGAAAGCATGGGT"
## [20] "GCAAGCGTTGTCCGGAATTACTGGGTGTAAAGGGAGCGCAGGCGGGTGATCAAGTCAGCTGTGAAAACTACGGGCTTAACCCGTAGACTGCAGTTGAAACTGTTCATCTTGAGTGAAGTAGAGGTTGGCGGAATTCCGAGTGTAGCGGTGAAATGCGTAGATATTCGGAGGAACACCGGTGGCGAAGGCGGCCAACTGGGCTTTAACTGACGCTGAGGCTCGAAAGTGTGGGG"
## [21] "GCAAGCGTTGTCCGGAATTACTGGGTGTAAAGGGAGCGTAGGCGGGGAGGCAAGTTGAATGTCTAAACTATCGGCTCAACTGATAGTCGCGTTCAAAACTGCCACTCTTGAGTGCAGTAGAGGTAGGCGGAATTCCTAGTGTAGCGGTGAAATGCGTAGATATTAGGAGGAACACCAGTGGCGAAGGCGGCCTACTGGGCTGTAACTGACGCTGAGGCTCGAAAGCGTGGGT"
## [22] "GCAAGCGTTGTCCGGAATTATTGGGCGTAAAGAGTACGTAGGCGGTTTGCTAAGCGCAAGGTGAAAGGCAGTGGCTTAACCATTGTAAGCCTTGCGAACTGACAGACTTGAGTGCAGGAGAGGAAAGCGGAATTCCTAGTGTAGCGGTGAAATGCGTAGATATTAGGAGGAACACCGGTGGCGAAGGCGGCTTTCTGGACTGTAACTGACGCTGAGGTACGAAAGCGTGGGG"
## [23] "GCAAGCGTTGTCCGGAATTATTGGGCGTAAAGCGCGCGCAGGCGGTTTCATAAGTCTGTCTTAAAAGTGCGGGGCTTAACCCCGTGAGGGGATGGAAACTATGGAACTGGAGTATCGGAGAGGAAAGCGGAATTCCTAGTGTAGCGGTGAAATGCGTAGATATTAGGAAGAACACCAGTGGCGAAGGCGGCTTTCTGGACGACAACTGACGCTGAGGCGCGAAAGCCAGGGG"
## [24] "GCAAGCGTTGTCCGGATTTACTGGGTGTAAAGGGCGTGTAGACGGGTTTGCAAGTCAGATGTGAAATACCGCAGCTTAACTGCGGGGCTGCATTTGAAACTGCAAATCTTGAGTGCGGGAGAGGAAAGCGGAATTCCTAGTGTAGCGGTGAAATGCGTAGATATTAGGAGGAACACCAGTGGCGAAGGCGGCTTTCTGGACCGTAACTGACGTTGAGGCGCGAAAGCGTGGGT"
## [25] "GCAAGCGTTGTCCGGATTTACTGGGTGTAAAGGGCGTGTAGCCGGAGAGACAAGTCAGATGTGAAATCCGCGGGCTCAACCCGCGAACTGCATTTGAAACTGTTTCCCTTGAGTATCGGAGAGGTAACCGGAATTCCTAGTGTAGCGGTGAAATGCGTAGATATTAGGAAGAACACCAGTGGCGAAGGCGGGTTACTGGACGACAACTGACGGTGAGGCGCGAAAGCGTGGGG"
## [26] "GCAAGCGTTGTCCGGATTTACTGGGTGTAAAGGGTGCGTAGGCGGATCTGCAAGTCAGGCGTGAAATCCATGGGCTTAACCCATGAACTGCGCTTGAAACTGTGGGTCTTGAGTGAAGTAGAGGTAGGCGGAATTCCCGGTGTAGCGGTGAAATGCGTAGAGATCGGGAGGAACACCAGTGGCGAAGGCGGCCTACTGGGCTTTAACTGACGCTGAGGCACGAAAGTGTGGGT"
## [27] "GCAAGCGTTGTCCGGATTTATTGGGCGTAAAGCGAGCGCAGGCGGAAGAATAAGTCTGATGTGAAAGCCCTCGGCTTAACCGAGGAACTGCATCGGAAACTGTTTTTCTTGAGTGCAGAAGAGGAGAGTGGAACTCCATGTGTAGCGGTGGAATGCGTAGATATATGGAAGAACACCAGTGGCGAAGGCGGCTCTCTGGTCTGCAACTGACGCTGAGGCTCGAAAGCATGGGT"
## [28] "GCGAGCGTTATCCGGAATTATTGGGCGTAAAGAGTACGTAGGCGGTTTTTTAAGCGAGGGGTATAAGGCAGCGGCTTAACTGCTGTTGGCCCCTCGAACTGGAGGACTTGAGTGTCGGAGAGGAAAGCGGAATTCCTAGTGTAGCGGTGAAATGCGTAGAGATTAGGAGGAACACCAGTGGCGAAGGCGGCTTTCTGGACGACAACTGACGCTGAGGTACGAAAGCGTGGGG"
## [29] "GCGAGCGTTATCCGGAATTATTGGGTGTAAAGGGTGCGTAGGCGGGATGTAAAGTCAGATGTGAAATGCCGCGGCTCAACCGCGGAGCTGCATTTGAAACTTATGTTCTTGAGTGAAGTAGAGGTAAGCGGAATTCCTAGTGTAGCGGTGAAATGCGTAGATATTAGGAGGAACACCGGTGGCGAAGGCGGCTTACTGGGCTTAGACTGACGCTGAGGCACGAAAGTGTGGGG"
## [30] "GCGAGCGTTATCCGGATTTATTGGGTTTAAAGGGAGCGTAGGCGGGCTTTTAAGTCAGCGGTCAAATGTCACGGCTCAACCGTGGCCAGCCGTTGAAACTGCAAGCCTTGAGTCTGCACAGGGCACATGGAATTCGTGGTGTAGCGGTGAAATGCTTAGATATCACGAAGAACTCCGATCGCGAAGGCATTGTGCCGGGGCATAACTGACGCTGAGGCTCGAAAGTGCGGGT"
## [31] "GCGAGCGTTATCCGGATTTATTGGGTTTAAAGGGCGCGTAGGCGGGACGTCAAGTCAGCGGTAAAAGACTGCAGCTAAACTGTAGCACGCCGTTGAAACTGGCGCCCTCGAGACGAGACGAGGGAGGCGGAACAAGTGAAGTAGCGGTGAAATGCATAGATATCACTTGGAACCCCGATAGCGAAGGCAGCTTCCCAGGCTCGATCTGACGCTGATGCGCGAGAGCGTGGGT"
## [32] "GCGAGCGTTGTCCGGAATTACTGGGTGTAAAGGGAGCGTAGGCGGGATAGCAAGTCAGATGTGAAAACTATGGGCTCAACCTGTAGATTGCATTTGAAACTGTTGTTCTTGAGTGAAGTAGAGGTAAGCGGAATTCCTAGTGTAGCGGTGAAATGCGTAGATATTAGGAGGAACATCGGTGGCGAAGGCGGCTTACTGGGCTTTTACTGACGCTGAGGCTCGAAAGCGTGGGG"
## [33] "GCGAGCGTTGTCCGGAATTATTGGGCGTAAAGGGCTTGTAGGTGGCTGGTTGCGTCTGTCGTGAAAGCTCATGGCTTAACTGTGGGTTTGCGGTGGGTACGGGCTGGCTTGAGTGCAGTAGGGGAGGCTGGAATTCCTGGTGTAGCGGTGGAATGCGCAGATATCAGGAGGAATACCGGTGGCGAAGGCGGGTCTCTGGGCTGTTACTGACACTGAGGAGCGAAAGCATGGGG"
## [34] "GCTAGCGTTATCCGGATTTACTGGGCGTAAAGGGTGCGTAGGTGGTTTCTTAAGTCAGGAGTGAAAGGCTACGGCTTAACCGTAGTAAGCTCTTGAAACTGGGAAACTTGAGTGCAGGAGAGGAAAGTGGAATTCCTAGTGTAGCGGTGAAATGCGTAGATATTAGGAGGAACACCAGTAGCGAAGGCGGCTTTCTGGACTGTAACTGACACTGAGGCACGAAAGCGTGGGG"
## [35] "t__217134"
## [36] "t__262755"
## [37] "t__264494"
## [38] "t__266763"
## [39] "t__520"
#format asv table with timepoint + hostname info (CSS now)
asv_table<-t(otu_table(ps_CSS_norm_pass_min_postDD_sup003))
asv_table <- as.data.frame(asv_table)
asv_table$timepoint <- ps_DeSeq_norm_pass_min_postDD_sup003@sam_data$Within.study.sampling.date
asv_table$timepoint <- as.factor(asv_table$timepoint)
asv_table$Host.Name <- ps_DeSeq_norm_pass_min_postDD_sup003@sam_data$Host.Name
#Create initial table of first asv to build off of
tmp <-summary(aov(asv_table[,1] ~ timepoint + Error(Host.Name/timepoint), data = asv_table))
p_val <-tmp$`Error: Host.Name:timepoint`[[1]]$`Pr(>F)`[1]
anova_asv_res <- tibble(colnames(asv_table[1]), p_val)
colnames(anova_asv_res) <- c("ASV", "p_val")
#Run for each asv
for (i in 2:(length(colnames(asv_table))-2)) {
form <-as.formula(paste0("asv_table[," , i, "]", " ~ timepoint + Error(Host.Name/timepoint)"))
tmp <-summary(aov(formula = form, data = asv_table))
p_val <-tmp$`Error: Host.Name:timepoint`[[1]]$`Pr(>F)`[1]
tmpres <- tibble(colnames(asv_table[i]), p_val)
colnames(tmpres) <- c("ASV", "p_val")
anova_asv_res<- rbind(anova_asv_res, tmpres)
}
#find sig ones btwn timepoints
asv_sig_btwn_timep_CSS<-anova_asv_res$ASV[which(anova_asv_res$p_val <= 0.05)]
asv_sig_btwn_timep_CSS
## [1] "GCAAGCGTTAATCGGAATTACTGGGCGTAAAGCGCACGCAGGCGGTCTGTCAAGTCGGATGTGAAATCCCCGGGCTCAACCTGGGAACTGCATTCGAAACTGGCAGGCTAGAGTCTTGTAGAGGGGGGTAGAATTCCAGGTGTAGCGGTGAAATGCGTAGAGATCTGGAGGAATACCGGTGGCGAAGGCGGCCCCCTGGACAAAGACTGACGCTCAGGTGCGAAAGCGTGGGG"
## [2] "GCAAGCGTTATCCGGATTTACTGGGTGTAAAGGGAGCGCAGGCGGAAGGCTAAGTCTGATGTGAAAGCCCGGGGCTCAACCCCGGTACTGCATTGGAAACTGGTCATCTAGAGTGTCGGAGGGGTAAGTGGAATTCCTAGTGTAGCGGTGAAATGCGTAGATATTAGGAGGAACACCAGTGGCGAAGGCGGCTTACTGGACGATAACTGACGCTGAGGCTCGAAAGCGTGGGG"
## [3] "GCAAGCGTTATCCGGATTTACTGGGTGTAAAGGGAGCGCAGGCGGATGGCTAAGTCTGATGTGAAAGCCCGGGGCTCAACCCCGGGACTGCATTGGAAACTGGTTATCTTGAGTGTCGGAGAGGTAAGTGGAATTCCTAGTGTAGCGGTGAAATGCGTAGATATTAGGAGGAACACCAGTGGCGAAGGCGGCTTACTGGACGACAACTGACGCTGAGGCTCGAAAGCGTGGGG"
## [4] "GCAAGCGTTATCCGGATTTACTGGGTGTAAAGGGAGCGCAGGCGGTTTGGCAAGTCTGATGTGAAAATCCGGGGCTCAACCCCGGAACTGCATTGGAAACTGTCAGACTAGAGTGTCGGAGAGGTAAGTGGAATTCCTAGTGTAGCGGTGAAATGCGTAGATATTAGGAGGAACACCAGTGGCGAAGGCGGCTTACTGGACGATAACTGACGCTGAGGCTCGAAAGCGTGGGG"
## [5] "GCAAGCGTTATCCGGATTTACTGGGTGTAAAGGGAGCGTAGACGGAAGAGCAAGTCTGATGTGAAAGGCTGGGGCTTAACCCCAGGACTGCATTGGAAACTGTTGTTCTAGAGTGCCGGAGAGGTAAGCGGAATTCCTAGTGTAGCGGTGAAATGCGTAGATATTAGGAGGAACACCAGTGGCGAAGGCGGCTTACTGGACGGTAACTGACGTTGAGGCTCGAAAGCGTGGGG"
## [6] "GCAAGCGTTATCCGGATTTACTGGGTGTAAAGGGAGCGTAGACGGCGATGCAAGTCTGGAGTGAAAGCCCAGGGCTCAACCCTGGGACTGCTTTGGAAACTGTATGGCTAGAGTGCTGGAGAGGTAAGTGGAATTCCTAGTGTAGCGGTGAAATGCGTAGATATTAGGAGGAACACCAGTGGCGAAGGCGGCTTACTGGACAGTAACTGACGTTGAGGCTCGAAAGCGTGGGG"
## [7] "GCAAGCGTTATCCGGATTTACTGGGTGTAAAGGGAGCGTAGGCGGTTTGACAAGTCAGAAGTGAAAGCCCGTGGCTCAACTGCGGGACTGCTTTTGAAACTGTGAGACTGGATTGCAGGAGAGGTAAGCGGAATTCCTAGTGTAGCGGTGAAATGCGTAGATATTAGGAGGAACACCAGTGGCGAAGGCGGCTTACTGGACTGTAAATGACGCTGAGGCTCGAAAGCGTGGGG"
## [8] "GCAAGCGTTATCCGGATTTACTGGGTGTAAAGGGTGCGTAGGTGGCAGTGCAAGTCAGATGTGAAAGGCCGGGGCTCAACCCCGGAGCTGCATTTGAAACTGCTCGGCTAGAGTACAGGAGAGGCAGGCGGAATTCCTAGTGTAGCGGTGAAATGCGTAGATATTAGGAGGAACACCAGTGGCGAAGGCGGCCTGCTGGACTGTTACTGACACTGAGGCACGAAAGCGTGGGG"
## [9] "GCAAGCGTTATCCGGATTTATTGGGCGTAAAGCGAGCGCAGGCGGTTTTTTAAGTCTGATGTGAAAGCCCTCGGCTTAACCGAGGAAGCGCATCGGAAACTGGGAAACTTGAGTGCAGAAGAGGACAGTGGAACTCCATGTGTAGCGGTGAAATGCGTAGATATATGGAAGAACACCAGTGGCGAAGGCGGCTGTCTGGTCTGTAACTGACGCTGAGGCTCGAAAGCATGGGT"
## [10] "GCAAGCGTTATCCGGATTTATTGGGCGTAAAGCGAGCGCAGGCGGTTTTTTAAGTCTGATGTGAAAGCCCTCGGCTTAACCGAGGAAGTGCATCGGAAACTGGGAAACTTGAGTGCAGAAGAGGACAGTGGAACTCCATGTGTAGCGGTGAAATGCGTAGATATATGGAAGAACACCAGTGGCGAAGGCGGCTGTCTGGTCTGTAACTGACGCTGAGGCTCGAAAGCATGGGT"
## [11] "GCAAGCGTTGTCCGGAATCATTGGGCGTAAAGAGTTCGTAGGCGGCATGTCAAGTCTGGTGTTAAATCCTGAGGCTCAACTTCAGTTCAGCACTGGATACTGGCAAGCTAGAATGCGGTAGAGGTAAAGGGAATTCCTGGTGTAGCGGTGAAATGCGTAGATATCAGGAAGAACACCGGTGGCGTAAGCGCTTTACTGGGCCGTTATTGACGCTGAGGAACGAAAGCCGGGGG"
## [12] "GCAAGCGTTGTCCGGAATTACTGGGCGTAAAGGGTGCGTAGGCGGCCGTTTAAGTCAGATGTGAAATACCCGTGCTTAACATGGGGGCTGCATTTGAAACTGGATGGCTTGAGTGCAGGAGAGGCAAGTGGAATTCCTAGTGTAGCGGTGAAATGCGTAGATATTAGGAGGAACACCAGTGGCGAAGGCGACTTGCTGGACTGTAACTGACGCTGAGGCACGAAAGCGTGGGG"
## [13] "GCAAGCGTTGTCCGGAATTACTGGGCGTAAAGGGTGCGTAGGCGGCGCGTTAAGTCAGATGTGAAATACCCGTGCTTAACATGGGGGGTGCATCTGATACTGGCGTGCTTGAGTGCAGGAGAGGAAAGCGGAATTCCTAGTGTAGCGGTGAAATGCGTAGATATTAGGAGGAACACCAGTGGCGAAGGCGGCTTTCTGGACTGTAACTGACGCTGAGGCACGAAAGCGTGGGG"
## [14] "GCAAGCGTTGTCCGGAATTACTGGGTGTAAAGGGAGCGCAGGCGGGAGAGCAAGTTGGAAGTGAAATCTGTGGGCTCAACTCACAAATTGCTTTCAAAACTGTTTTTCTTGAGTGGTGTAGAGGTAGGCGGAATTCCCGGTGTAGCGGTGGAATGCGTAGATATCGGGAGGAACACCAGTGGCGAAGGCGGCCTACTGGGCACTAACTGACGCTGAGGCTCGAAAGCATGGGT"
## [15] "GCAAGCGTTGTCCGGAATTACTGGGTGTAAAGGGAGCGCAGGCGGGATCGTAAGTTGGGAGTGAAATTCATGGGCTCAACCCATGACCTGCTTTCAAAACTGCGATTCTTGAGTGGTGTAGAGGTAGGCGGAATTCCCGGTGTAGCGGTGGAATGCGTAGATATCGGGAGGAACACCAGTGGCGAAGGCGGCCTACTGGGCACTAACTGACGCTGAGGCTCGAAAGCATGGGT"
## [16] "GCAAGCGTTGTCCGGAATTACTGGGTGTAAAGGGAGCGCAGGCGGGTGATCAAGTCAGCTGTGAAAACTACGGGCTTAACCCGTAGACTGCAGTTGAAACTGTTCATCTTGAGTGAAGTAGAGGTTGGCGGAATTCCGAGTGTAGCGGTGAAATGCGTAGATATTCGGAGGAACACCGGTGGCGAAGGCGGCCAACTGGGCTTTAACTGACGCTGAGGCTCGAAAGTGTGGGG"
## [17] "GCAAGCGTTGTCCGGAATTATTGGGCGTAAAGGGTGCGTAGGCGGCCGATCAAGTCAGGTGTGAAAGACCCGTGCTTAACATGGGGGTTGCACTTGAAACTGGTTGGCTTGAGTATGGGAGAGGCAAGCGGAATTCCCGGTGTAGCGGTGAAATGCGTAGAGATCGGGAGGAACACCAGTGGCGAAGGCGGCTTGCTGGACCAAAACTGACGCTGAGGCACGAAAGCGTGGGT"
## [18] "GCAAGCGTTGTCCGGATTTACTGGGTGTAAAGGGCGTGTAGGCGGAGAAGCAAGTCAGAAGTGAAATCCATGGGCTTAACCCATGAACTGCTTTTGAAACTGTTTCCCTTGAGTATCGGAGAGGCAGGCGGAATTCCTAGTGTAGCGGTGAAATGCGTAGATATTAGGAGGAACACCAGTGGCGAAGGCGGCCTGCTGGACGACAACTGACGCTGAGGCGCGAAAGCGTGGGG"
## [19] "GCAAGCGTTGTCCGGATTTACTGGGTGTAAAGGGCGTGTAGGCGGAGATGCAAGTCAGATGTGAAATCCCCGGGCTTAACCCGGGAACTGCATTTGAAACTGTATCCCTTGAGTATCGGAGAGGCAGGCGGAATTCCTAGTGTAGCGGTGAAATGCGTAGATATTAGGAGGAACACCAGTGGCGAAGGCGGCCTGCTGGACGACAACTGACGCTGAGGCGCGAAAGCGTGGGG"
## [20] "GCAAGCGTTGTCCGGATTTACTGGGTGTAAAGGGCGTGTAGGCGGAGATGCAAGTCAGATGTGAAATCCTCGGGCTTAACCCGGGAACTGCATTTGAAACTGTATCCCTTGAGTATCGGAGAGGCAGGCGGAATTCCTAGTGTAGCGGTGAAATGCGTAGATATTAGGAGGAACACCAGTGGCGAAGGCGGCCTGCTGGACGACAACTGACGCTGAGGCGCGAAAGCGTGGGG"
## [21] "GCAAGCGTTGTCCGGATTTACTGGGTGTAAAGGGTGCGTAGGCGGCTTTGCAAGTCAGATGTGAAATCTATGGGCTCAACCCATAAACTGCATTTGAAACTGTAGAGCTTGAGTGAAGTAGAGGCAGGCGGAATTCCCCGTGTAGCGGTGAAATGCGTAGAGATGGGGAGGAACACCAGTGGCGAAGGCGGCCTGCTGGGCTTTAACTGACGCTGAGGCACGAAAGCGTGGGT"
## [22] "GCAAGCGTTGTCCGGATTTACTGGGTGTAAAGGGTGCGTAGGCGGTTCGGCAAGTCAGAAGTGAAATCCATGGGCTTAACCCATGAACTGCTTTTGAAACTGTCGAACTTGAGTGAAGTAGAGGTAGGCGGAATTCCCGGTGTAGCGGTGAAATGCGTAGAGATCGGGAGGAACACCAGTGGCGAAGGCGGCCTACTGGGCTTTAACTGACGCTGAGGCACGAAAGCATGGGT"
## [23] "GCGAGCGTTATCCGGAATTATTGGGTGTAAAGCGTGTGTAGGCGGGAAATTAAGTCTAAGGTCTAAGCCCGGAGCTCAACTCCGGTTCGCCTTAGAAACTGATTTTCTTGAGTGTGGTAGAGGCAAACGGAATTTCTAGTGTAGCGGTAAAATGCGTAGATATTAGAAGGAACACCAGTGGCGAAGGCGGTTTGCTGGGCCACTACTGACGCTGAGACACGAAAGCGTGGGG"
## [24] "t__262755"
## [25] "t__520"
## [26] "t__81974"
#7 in common
asv_sig_btwn_timep_DES[asv_sig_btwn_timep_DES %in% asv_sig_btwn_timep_CSS]
## [1] "GCAAGCGTTATCCGGATTTACTGGGTGTAAAGGGAGCGTAGACGGCGATGCAAGTCTGGAGTGAAAGCCCAGGGCTCAACCCTGGGACTGCTTTGGAAACTGTATGGCTAGAGTGCTGGAGAGGTAAGTGGAATTCCTAGTGTAGCGGTGAAATGCGTAGATATTAGGAGGAACACCAGTGGCGAAGGCGGCTTACTGGACAGTAACTGACGTTGAGGCTCGAAAGCGTGGGG"
## [2] "GCAAGCGTTGTCCGGAATTACTGGGCGTAAAGGGTGCGTAGGCGGCCGTTTAAGTCAGATGTGAAATACCCGTGCTTAACATGGGGGCTGCATTTGAAACTGGATGGCTTGAGTGCAGGAGAGGCAAGTGGAATTCCTAGTGTAGCGGTGAAATGCGTAGATATTAGGAGGAACACCAGTGGCGAAGGCGACTTGCTGGACTGTAACTGACGCTGAGGCACGAAAGCGTGGGG"
## [3] "GCAAGCGTTGTCCGGAATTACTGGGCGTAAAGGGTGCGTAGGCGGCGCGTTAAGTCAGATGTGAAATACCCGTGCTTAACATGGGGGGTGCATCTGATACTGGCGTGCTTGAGTGCAGGAGAGGAAAGCGGAATTCCTAGTGTAGCGGTGAAATGCGTAGATATTAGGAGGAACACCAGTGGCGAAGGCGGCTTTCTGGACTGTAACTGACGCTGAGGCACGAAAGCGTGGGG"
## [4] "GCAAGCGTTGTCCGGAATTACTGGGTGTAAAGGGAGCGCAGGCGGGAGAGCAAGTTGGAAGTGAAATCTGTGGGCTCAACTCACAAATTGCTTTCAAAACTGTTTTTCTTGAGTGGTGTAGAGGTAGGCGGAATTCCCGGTGTAGCGGTGGAATGCGTAGATATCGGGAGGAACACCAGTGGCGAAGGCGGCCTACTGGGCACTAACTGACGCTGAGGCTCGAAAGCATGGGT"
## [5] "GCAAGCGTTGTCCGGAATTACTGGGTGTAAAGGGAGCGCAGGCGGGTGATCAAGTCAGCTGTGAAAACTACGGGCTTAACCCGTAGACTGCAGTTGAAACTGTTCATCTTGAGTGAAGTAGAGGTTGGCGGAATTCCGAGTGTAGCGGTGAAATGCGTAGATATTCGGAGGAACACCGGTGGCGAAGGCGGCCAACTGGGCTTTAACTGACGCTGAGGCTCGAAAGTGTGGGG"
## [6] "t__262755"
## [7] "t__520"
#remove these taxa from phyloseqs (since they are noise essentially)
filtered_ps003 <-prune_taxa(filtered_ps003, taxa = taxa_names(filtered_ps003)[!(taxa_names(filtered_ps003) %in% asv_sig_btwn_timep_DES)])
ps_CSS_norm_pass_min_postDD_sup003<- prune_taxa(ps_CSS_norm_pass_min_postDD_sup003, taxa = taxa_names(ps_CSS_norm_pass_min_postDD_sup003)[!(taxa_names(ps_CSS_norm_pass_min_postDD_sup003) %in% asv_sig_btwn_timep_CSS)])
ps_DeSeq_norm_pass_min_postDD_sup003<- prune_taxa(ps_DeSeq_norm_pass_min_postDD_sup003, taxa = taxa_names(ps_DeSeq_norm_pass_min_postDD_sup003)[!(taxa_names(ps_DeSeq_norm_pass_min_postDD_sup003) %in% asv_sig_btwn_timep_DES)])
Identify bacterial and archaeal taxa (genera, species and strains) whose abundance is observed significantly more or less in the ASD
dir.create(paste0(output_data, 'DESeq/'))
###Run DESeq proper (not the normalization but all of it)
runDESeq <- function(ps, dcut){
diagdds = phyloseq_to_deseq2(ps, ~ phenotype)
diagdds <- estimateSizeFactors(diagdds, type = "poscounts")
diagdds <- DESeq(diagdds,fitType="parametric", betaPrior = FALSE)
res = results(diagdds, contrast = c("phenotype", "N", "A"))
res$padj[is.na(res$padj)] = 1
sig <- res[res$padj < dcut,]
if (dim(sig)[1] == 0)
{sigtab<- as.data.frame(1, row.names="nothing")
colnames(sigtab) <- 'padj'}
else
{
sigtab <- data.frame(sig)
}
return(list(res, sigtab))
}
###Running analysis
###split thedata based on the real 3 timepoints
P1<-prune_samples(rownames(sample_data(filtered_ps003))[sample_data(filtered_ps003)$Within.study.sampling.date == "Timepoint 1"], filtered_ps003)
P2<-prune_samples(rownames(sample_data(filtered_ps003))[sample_data(filtered_ps003)$Within.study.sampling.date == "Timepoint 2"], filtered_ps003)
P3<-prune_samples(rownames(sample_data(filtered_ps003))[sample_data(filtered_ps003)$Within.study.sampling.date == "Timepoint 3"], filtered_ps003)
#several significants
deseq_res_P1 <- runDESeq(P1, deseq_cut)
deseq_res_P2 <- runDESeq(P2, deseq_cut)
deseq_res_P3 <- runDESeq(P3, deseq_cut)
# print significant taxa
datatable(deseq_res_P1[[2]])
datatable(deseq_res_P2[[2]])
datatable(deseq_res_P3[[2]])
#"ASV_1669" present twice timepoint 1 and 3
# save
saveRDS(deseq_res_P1, file=paste0(output_data, "DESeq/deseq_res_P1_adjp", deseq_cut, ".Rda"))
saveRDS(deseq_res_P2, file=paste0(output_data, "DESeq/deseq_res_P2_adjp", deseq_cut, ".Rda"))
saveRDS(deseq_res_P3, file=paste0(output_data, "DESeq/deseq_res_P3_adjp", deseq_cut, ".Rda"))
#Working with time series
#according to the DeSeq vignette: design including the time factor, and then test using the likelihood ratio test as described
#the following section, where the time factor is removed in the reduced formula
runDESeq_time <- function(ps, dcut){
diagdds = phyloseq_to_deseq2(ps, ~ phenotype + Within.study.sampling.date)
diagdds <- estimateSizeFactors(diagdds, type = "poscounts")
diagdds <- DESeq(diagdds,fitType="parametric", betaPrior = FALSE)
#resultsNames(diagdds): to determine the constrast
res = results(diagdds, contrast = c("phenotype", "A", "N"))
res$padj[is.na(res$padj)] = 1
sig <- res[res$padj < dcut,]
if (dim(sig)[1] == 0)
{sigtab<- as.data.frame(1, row.names="nothing")
colnames(sigtab) <- 'padj'}
else
{
sigtab <- data.frame(sig)
}
return(list(res, sigtab))
}
#and this time when factoring in the interaction for longitudinal study!
bla<-runDESeq_time(filtered_ps003, deseq_cut)
saveRDS(bla, file=paste0(output_data, "DESeq/Deseq_time_interaction_adjp", deseq_cut, ".Rda"))
datatable(bla[2][[1]])
# significant ASVs
row.names(bla[2][[1]])
## [1] "ACAAGCGTTGTCCGGAATTACTGGGTGTAAAGGGAGCGCAGGCGGGAAGACAAGTTGGAAGTGAAATCTATGGGCTTAACCCATAAACTGCTTTCAAAACTGTTTTTCTTGAGTAGTGCAGAGGTAGGCGGAATTCCCGGTGTAGCGGTGGAATGCGTAGATATCGGGAGGAACACCAGTGGCGAAGGCGGCCTACTGGGCACCAACTGACGCTGAGGCTCGAAAGTGTGGGT"
## [2] "t__262551"
des_res<-cbind(row.names(bla[2][[1]]), as(tax_table(filtered_ps003)[row.names(bla[2][[1]]), ], "matrix"))
des_res_esv <- des_res
rownames(des_res) <- NULL
des_res
##
## [1,] "ACAAGCGTTGTCCGGAATTACTGGGTGTAAAGGGAGCGCAGGCGGGAAGACAAGTTGGAAGTGAAATCTATGGGCTTAACCCATAAACTGCTTTCAAAACTGTTTTTCTTGAGTAGTGCAGAGGTAGGCGGAATTCCCGGTGTAGCGGTGGAATGCGTAGATATCGGGAGGAACACCAGTGGCGAAGGCGGCCTACTGGGCACCAACTGACGCTGAGGCTCGAAAGTGTGGGT"
## [2,] "t__262551"
## Domain Phylum Class Order
## [1,] "d__Bacteria" "p__Firmicutes_A" "c__Clostridia" "o__Oscillospirales"
## [2,] "d__Bacteria" "p__Firmicutes" "c__Bacilli" "o__Erysipelotrichales"
## Family Genus Species
## [1,] "f__Ruminococcaceae" "g__Faecalibacterium" "s__unclassified"
## [2,] "f__Erysipelotrichaceae" "g__Holdemanella" "s__Holdemanella__biformis"
## Strain
## [1,] "t__unclassified"
## [2,] "t__262551"
dir.create(paste0(output_data, 'metagenomseq/'))
###Run ZIG model fitting and prediction
run_metagenom_seq<-function(ps,maxit, mcut){
p_metag<-phyloseq_to_metagenomeSeq(ps)
#filtering at least 4 samples
p_metag= cumNorm(p_metag, p=0.75)
normFactor =normFactors(p_metag)
normFactor =log2(normFactor/median(normFactor) + 1)
#mod = model.matrix(~ASDorNeuroT +PairASD+ normFactor)
mod = model.matrix(~phenotype + normFactor, data = pData(p_metag))
settings =zigControl(maxit =maxit, verbose =FALSE)
#settings =zigControl(tol = 1e-5, maxit = 30, verbose = TRUE, pvalMethod = 'bootstrap')
fit =fitZig(obj = p_metag, mod = mod, useCSSoffset = FALSE, control = settings)
#Note: changed fit$taxa to fit@taxa in light of error (probably from newer metagenomeseq ver.)
res_fit<-MRtable(fit, number = length(fit@taxa))
res_fit_nonfiltered <- copy(res_fit)
res_fit<-res_fit[res_fit$adjPvalues<mcut,]
#finally remove the ones that are not with enough samples
#mean_sample<-mean(calculateEffectiveSamples(fit))
#res_fit<-res_fit[res_fit$`counts in group 0` & res_fit$`counts in group 1` > mean_sample,]
Min_effec_samp<-calculateEffectiveSamples(fit)
Min_effec_samp<-Min_effec_samp[ names(Min_effec_samp) %in% rownames(res_fit)] #####there is a bug here
#manually removing the ones with "NA"
res_fit<-res_fit[grep("NA",rownames(res_fit), inv=T),]
res_fit$Min_sample<-Min_effec_samp
res_fit<-res_fit[res_fit$`+samples in group 0` >= Min_effec_samp & res_fit$`+samples in group 1` >= Min_effec_samp,]
return(list(res_fit_nonfiltered, res_fit))
}
run_metagenom_seq2<-function(ps,maxit, mcut){
p_metag<-phyloseq_to_metagenomeSeq(ps)
#filtering at least 4 samples
p_metag= cumNorm(p_metag, p=0.75)
normFactor =normFactors(p_metag)
normFactor =log2(normFactor/median(normFactor) + 1)
#mod = model.matrix(~ASDorNeuroT +PairASD+ normFactor)
mod = model.matrix(~phenotype + Within.study.sampling.date +normFactor, data = pData(p_metag))
settings =zigControl(maxit =maxit, verbose =FALSE)
#settings =zigControl(tol = 1e-5, maxit = 30, verbose = TRUE, pvalMethod = 'bootstrap')
fit =fitZig(obj = p_metag, mod = mod, useCSSoffset = FALSE, control = settings)
#Note: changed fit$taxa to fit@taxa in light of error (probably from newer metagenomeseq ver.)
res_fit<-MRtable(fit, number = length(fit@taxa))
res_fit_nonfiltered <- copy(res_fit)
res_fit<-res_fit[res_fit$adjPvalues<mcut,]
#finally remove the ones that are not with enough samples
#mean_sample<-mean(calculateEffectiveSamples(fit))
#res_fit<-res_fit[res_fit$`counts in group 0` & res_fit$`counts in group 1` > mean_sample,]
Min_effec_samp<-calculateEffectiveSamples(fit)
Min_effec_samp<-Min_effec_samp[ names(Min_effec_samp) %in% rownames(res_fit)] #####there is a bug here
#manually removing the ones with "NA"
res_fit<-res_fit[grep("NA",rownames(res_fit), inv=T),]
res_fit$Min_sample<-Min_effec_samp
res_fit<-res_fit[res_fit$`+samples in group 0` >= Min_effec_samp & res_fit$`+samples in group 1` >= Min_effec_samp,]
return(list(res_fit_nonfiltered, res_fit))
}
#Now for each time
P1<-prune_samples(rownames(sample_data(filtered_ps003))[sample_data(filtered_ps003)$Within.study.sampling.date == "Timepoint 1"], filtered_ps003)
P2<-prune_samples(rownames(sample_data(filtered_ps003))[sample_data(filtered_ps003)$Within.study.sampling.date == "Timepoint 2"], filtered_ps003)
P3<-prune_samples(rownames(sample_data(filtered_ps003))[sample_data(filtered_ps003)$Within.study.sampling.date == "Timepoint 3"], filtered_ps003)
zig_res_P1 <- run_metagenom_seq(P1,30, mtgseq_cut) # 30The maximum number of iterations for the expectation-maximization algorithm
zig_res_P2 <- run_metagenom_seq(P2,30, mtgseq_cut)
zig_res_P3 <- run_metagenom_seq(P3,30, mtgseq_cut)
zig_res_all<- run_metagenom_seq2(filtered_ps003,30, mtgseq_cut)
# print significant taxa
datatable(zig_res_P1[[2]])
datatable(zig_res_P2[[2]])
datatable(zig_res_P3[[2]])
datatable(zig_res_all[[2]])
zig_res_P1_filtered <- data.frame(cbind(zig_res_P1[[2]], tax_table(P1)[rownames(zig_res_P1[[2]]),]))
zig_res_P1_filtered$enriched <- ifelse(zig_res_P1_filtered$phenotypeN < 0, "Aut", "Control")
zig_res_P2_filtered <- data.frame(cbind(zig_res_P2[[2]], tax_table(P2)[rownames(zig_res_P2[[2]]), ]))
zig_res_P2_filtered$enriched <- ifelse(zig_res_P2_filtered$phenotypeN < 0, "Aut", "Control")
zig_res_P3_filtered <- data.frame(cbind(zig_res_P3[[2]], tax_table(P3)[rownames(zig_res_P3[[2]]), ]))
zig_res_P3_filtered$enriched <- ifelse(zig_res_P3_filtered$phenotypeN < 0, "Aut", "Control")
zig_res_all_filtered <- data.frame(cbind(zig_res_all[[2]], tax_table(filtered_ps003)[rownames(zig_res_all[[2]]), ]))
zig_res_all_filtered$enriched <- ifelse(zig_res_all_filtered$phenotypeN < 0, "Aut", "Control")
saveRDS(zig_res_P1, file=paste0(output_data, "metagenomseq/zig_res_P1_adjp", mtgseq_cut, ".rds"))
saveRDS(zig_res_P2, file=paste0(output_data, "metagenomseq/zig_res_P2_adjp", mtgseq_cut, ".rds"))
saveRDS(zig_res_P3, file=paste0(output_data, "metagenomseq/zig_res_P3_adjp", mtgseq_cut, ".rds"))
#do we have any in ESV in common?
#Reduce(intersect, list(rownames(zig_res_P1_filtered),rownames(zig_res_P2_filtered),rownames(zig_res_P3_filtered)))
#rownames(zig_res_P1[[2]])[which(rownames(zig_res_P1[[2]]) %in% c(rownames(zig_res_P2[[2]]), rownames(zig_res_P3[[2]])))]
#rownames(zig_res_P2[[2]])[which(rownames(zig_res_P2[[2]]) %in% rownames(zig_res_P3[[2]]))]
metag_res<-cbind(rownames(zig_res_all[[2]]), as(tax_table(filtered_ps003)[rownames(zig_res_all[[2]]), ], "matrix"))
metag_res_esv <-metag_res
rownames(metag_res) <- NULL
metag_res
##
## [1,] "GCGAGCGTTATCCGGATTTATTGGGTTTAAAGGGTGCGTAGGTGGTGATTTAAGTCAGCGGTGAAAGTTTGTGGCTCAACCATAAAATTGCCGTTGAAACTGGGTTACTTGAGTGTGTTTGAGGTAGGCGGAATGCGTGGTGTAGCGGTGAAATGCATAGATATCACGCAGAACTCCGATTGCGAAGGCAGCTTACTAAACCATAACTGACACTGAAGCACGAAAGCGTGGGG"
## [2,] "GCAAGCGTTGTCCGGATTTACTGGGTGTAAAGGGTGCGTAGGCGGATTGGCAAGTCAGTAGTGAAATCCATGGGCTTAACCCATGACGTGCTATTGAAACTGTTGATCTTGAGTGAAGTAGAGGTAAGCGGAATTCCCGGTGTAGCGGTGAAATGCGTAGAGATCGGGAGGAACACCAGTGGCGAAGGCGGCTTACTGGGCTTTAACTGACGCTGAGGCACGAAAGCATGGGT"
## [3,] "GCAAGCGTTGTCCGGATTTACTGGGTGTAAAGGGCGTGTAGGCGGAGATGCAAGTCAGATGTGAAATCCTCGGGCTTAACCCGGGAACTGCATTTGAAACTGTATCCCTTGAGTATCGGAGAGGCAGGCGGAATTCCTAGTGTAGCGGTGAAATGCGTAGATATTAGGAGGAACACCAGTGGCGAAGGCGGCCTGCTGGACGACAACTGACGCTGAGGCGCGAAAGCGTGGGG"
## [4,] "GCAAGCGTTATCCGGAATGACTGGGCGTAAAGGGTGCGTAGGTGGTTTGTCAAGTTGGCAGCGTAATTCCGTGGCTTAACCGCGGAACTACTGCCAAAACTGATAGGCTTGAGTGCGGCAGGGGTATGTGGAATTCCTAGTGTAGCGGTGGAATGCGTAGATATTAGGAGGAACACCGGTGGCGAAAGCGACATACTGGGCCGTAACTGACACTGAAGCACGAAAGCGTGGGG"
## [5,] "t__186843"
## [6,] "GCAAGCGTTATCCGGATTTACTGGGTGTAAAGGGAGCGTAGACGGCGAGGCAAGTCTGATGTGAAAACCCGGGGCTCAACCCCGTGACTGCATTGGAAACTGTTTTGCTTGAGTGCCGGAGAGGTAAGCGGAATTCCTAGTGTAGCGGTGAAATGCGTAGATATTAGGAGGAACACCAGTGGCGAAGGCGGCTTACTGGACGGCAACTGACGTTGAGGCTCGAAAGCGTGGGG"
## [7,] "GCAAGCGTTATCCGGATTTACTGGGTGTAAAGGGAGTGTAGGTGGTATCACAAGTCAGAAGTGAAAGCCCGGGGCTCAACCCCGGGACTGCTTTTGAAACTGTGGAACTGGAGTGCAGGAGAGGTAAGTGGAATTCCTAGTGTAGCGGTGAAATGCGTAGATATTAGGAGGAACACCAGTGGCGAAGGCGGCTTACTGGACTGTAACTGACACTGAGGCTCGAAAGCGTGGGG"
## [8,] "GCGAGCGTTATCCGGATTCATTGGGCGTAAAGCGCGCGTAGGCGGCCCGGCAGGCCGGGGGTCGAAGCGGGGGGCTCAACCCCCCGAAGCCCCCGGAACCTCCGCGGCTTGGGTCCGGTAGGGGAGGGTGGAACACCCGGTGTAGCGGTGGAATGCGCAGATATCGGGTGGAACACCGGTGGCGAAGGCGGCCCTCTGGGCCGAGACCGACGCTGAGGCGCGAAAGCTGGGGG"
## [9,] "TCAAGCGTTGTTCGGAATCACTGGGCGTAAAGCGTGCGTAGGCTGTTTCGTAAGTCGTGTGTGAAAGGCGCGGGCTCAACCCGCGGACGGCACATGATACTGCGAGACTAGAGTAATGGAGGGGGAACCGGAATTCTCGGTGTAGCAGTGAAATGCGTAGATATCGAGAGGAACACTCGTGGCGAAGGCGGGTTCCTGGACATTAACTGACGCTGAGGCACGAAGGCCAGGGG"
## [10,] "GCAAGCGTTGTCCGGAATTATTGGGCGTAAAGCGCGCGCAGGCGGATTGGTCAGTCTGTCTTAAAAGTTCGGGGCTTAACCCCGTGATGGGATGGAAACTGCCAATCTAGAGTATCGGAGAGGAAAGTGGAATTCCTAGTGTAGCGGTGAAATGCGTAGATATTAGGAAGAACACCAGTGGCGAAGGCGACTTTCTGGACGAAAACTGACGCTGAGGCGCGAAAGCCAGGGG"
## [11,] "GCAAGCGTTATCCGGATTTACTGGGTGTAAAGGGAGCGTAGACGGAATGGCAAGTCTGATGTGAAAGGCCGGGGCTCAACCCCGGGACTGCATTGGAAACTGTCAATCTAGAGTACCGGAGGGGTAAGTGGAATTCCTAGTGTAGCGGTGAAATGCGTAGATATTAGGAGGAACACCAGTGGCGAAGGCGGCTTACTGGACGGTAACTGACGTTGAGGCTCGAAAGCGTGGGG"
## [12,] "GCAAGCGTTATCCGGATTTACTGGGTGTAAAGGGAGCGTAGACGGACTGGCAAGTCTGATGTGAAAGGCGGGGGCTCAACCCCTGGACTGCATTGGAAACTGTTAGTCTTGAGTGCCGGAGAGGTAAGCGGAATTCCTAGTGTAGCGGTGAAATGCGTAGATATTAGGAGGAACACCAGTGGCGAAGGCGGCTTACTGGACGGTAACTGACGTTGAGGCTCGAAAGCGTGGGG"
## [13,] "CCGAGCGTTGTCCGGATTTATTGGGCGTAAAGCGAGCGCAGGCGGTTTGATAAGTCTGAAGTTAAAGGCTGTGGCTCAACCATAGTTCGCTTTGGAAACTGTCAAACTTGAGTGCAGAAGGGGAGAGTGGAATTCCATGTGTAGCGGTGAAATGCGTAGATATATGGAGGAACACCGGTGGCGAAAGCGGCTCTCTGGTCTGTAACTGACGCTGAGGCTCGAAAGCGTGGGG"
## [14,] "GCAAGCGTTATCCGGATTTACTGGGTGTAAAGGGCGTGTAGGCGGGAGTGCAAGTCAGATGTGAAAACTATGGGCTCAACCCATAGCCTGCATTTGAAACTGTACTTCTTGAGTGATGGAGAGGCAGGCGGAATTCCCTGTGTAGCGGTGAAATGCGTAGATATAGGGAGGAACACCAGTGGCGAAGGCGGCCTGCTGGACATTAACTGACGCTGAGGCGCGAAAGCGTGGGG"
## [15,] "CCGAGCGTTATCCGGATTTATTGGGTTTAAAGGGAGCGTAGGTGGATTGTTAAGTCAGTTGTGAAAGTTTGCGGCTCAACCGTAAAATTGCAGTTGAAACTGGCAGTCTTGAGTACAGTAGAGGTGGGCGGAATTCGTGGTGTAGCGGTGAAATGCTTAGATATCACGAAGAACTCCGATTGCGAAGGCAGCTCACTAGACTGCAACTGACACTGATGCTCGAAAGTGTGGGT"
## [16,] "t__258798"
## [17,] "GCGAGCGTTATCCGGATTTATTGGGTTTAAAGGGTGCGTAGGCGGTTTATTAAGTTAGTGGTTAAATATTTGAGCTAAACTCAATTGTGCCATTAATACTGGTAAACTGGAGTACAGACGAGGTAGGCGGAATAAGTTAAGTAGCGGTGAAATGCATAGATATAACTTAGAACTCCGATAGCGAAGGCAGCTTACCAGACTGTAACTGACGCTGATGCACGAGAGCGTGGGT"
## [18,] "CCGAGCGTTATCCGGATTTATTGGGTTTAAAGGGAGCGTAGGTGGACAGTTAAGTCAGTTGTGAAAGTTTGCGGCTCAACCGTAAAATTGCAGTTGATACTGGCTGTCTTGAGTACAGTAGAGGTGGGCGGAATTCGTGGTGTAGCGGTGAAATGCTTAGATATCACGAAGAACTCCGATTGCGAAGGCAGCTCACTGGACTGCAACTGACACTGATGCTCGAAAGTGTGGGT"
## [19,] "GCGAGCGTTAATCGGAATAACTGGGCGTAAAGGGCACGCAGGCGGTGACTTAAGTGAGGTGTGAAAGCCCCGGGCTTAACCTGGGAATTGCATTTCATACTGGGTCGCTAGAGTACTTTAGGGAGGGGTAGAATTCCACGTGTAGCGGTGAAATGCGTAGAGATGTGGAGGAATACCGAAGGCGAAGGCAGCCCCTTGGGAATGTACTGACGCTCATGTGCGAAAGCGTGGGG"
## [20,] "GCGAGCGTTATCCGGAATTACTGGGTGTAAAGGGTGCGTAGGCGGCACCGTAAGTCTGTTGTGAAAGGCGATGGCTTAACCATCGAAGTGCAATGGAAACTGCGGAGCTAGAGTGCAGGAGAGGAAAGCGGAATTCCTAGTGTAGCGGTGAAATGCGTAGATATTAGGAAGAACACCAGTGGCGAAGGCGGCTTTCTGGACTGCAACTGACGCTGAGGCACGAAAGCGTGGGG"
## [21,] "GCGAGCGTTATCCGGAATTATTGGGCGTAAAGAGCGCGCAGGTGGTTGATTAAGTCTGATGTGAAAGCCCACGGCTTAACCGTGGAGGGTCATTGGAAACTGGTCAACTTGAGTGCAGAAGAGGGAAGTGGAATTCCATGTGTAGCGGTGAAATGCGTAGAGATATGGAGGAACACCAGTGGCGAAGGCGGCTTCCTGGTCTGTAACTGACACTGAGGCGCGAAAGCGTGGGG"
## [22,] "GCAAGCGTTATCCGGATTTACTGGGTGTAAAGGGAGCGTAGACGGTGTGGCAAGTCTGATGTGAAAGGCATGGGCTCAACCTGTGGACTGCATTGGAAACTGTCATACTTGAGTGCCGGAGGGGTAAGCGGAATTCCTAGTGTAGCGGTGAAATGCGTAGATATTAGGAGGAACACCAGTGGCGAAGGCGGCTTACTGGACGGTAACTGACGTTGAGGCTCGAAAGCGTGGGG"
## [23,] "GCAAGCGTTATCCGGATTTACTGGGTGTAAAGGGAGCGTAGGCGGTCTGACAAGTCAGAAGTGAAAGCCCGGGGCTCAACTCCGGGACTGCTTTTGAAACTGCCGGACTAGATTGCAGGAGAGGTAAGTGGAATTCCTAGTGTAGCGGTGAAATGCGTAGATATTAGGAGGAACACCAGTGGCGAAGGCGGCTTACTGGACTGTAAATGACGCTGAGGCTCGAAAGCGTGGGG"
## [24,] "t__220541"
## [25,] "t__209626"
## [26,] "GCAAGCGTTATCCGGATTTACTGGGTGTAAAGGGTGCGTAGGTGGCAGTGCAAGTCAGATGTGAAAGGCCGGGGCTCAACCCCGGAGCTGCATTTGAAACTGCATAGCTAGAGTACAGGAGAGGCAGGCGGAATTCCTAGTGTAGCGGTGAAATGCGTAGATATTAGGAGGAACACCAGTGGCGAAGGCGGCCTGCTGGACTGTTACTGACACTGAGGCACGAAAGCGTGGGG"
## [27,] "GCGAGCGTTGTCCGGAATTACTGGGCGTAAAGGGTGCGTAGGCGGTCTGAAAAGTCGGATGTGAAATCCCCGTGCTTAACATGGGAGCTGCATTCGAAACTTTCGGACTTGAGTGTCGGAGAGGTAAGCGGAATTCCCGGTGTAGCGGTGAAATGCGTAGATATCGGGAGGAACACCAGTGGCGAAGGCGGCTTACTGGACGACAACTGACGCTGAGGCACGAAAGCGTGGGG"
## Domain Phylum Class
## [1,] "d__Bacteria" "p__Bacteroidota" "c__Bacteroidia"
## [2,] "d__Bacteria" "p__Firmicutes_A" "c__Clostridia"
## [3,] "d__Bacteria" "p__Firmicutes_A" "c__Clostridia"
## [4,] "d__Bacteria" "p__Firmicutes_A" "c__Clostridia"
## [5,] "d__Bacteria" "p__Firmicutes_A" "c__Clostridia"
## [6,] "d__Bacteria" "p__Firmicutes_A" "c__Clostridia"
## [7,] "d__Bacteria" "p__Firmicutes_A" "c__Clostridia"
## [8,] "d__Bacteria" "p__Actinobacteriota" "c__Coriobacteriia"
## [9,] "d__Bacteria" "p__Verrucomicrobiota" "c__Verrucomicrobiae"
## [10,] "d__Bacteria" "p__Firmicutes_C" "c__Negativicutes"
## [11,] "d__Bacteria" "p__Firmicutes_A" "c__Clostridia"
## [12,] "d__Bacteria" "p__Firmicutes_A" "c__Clostridia"
## [13,] "d__Bacteria" "p__Firmicutes" "c__Bacilli"
## [14,] "d__Bacteria" "p__Firmicutes_A" "c__Clostridia"
## [15,] "d__Bacteria" "p__Bacteroidota" "c__Bacteroidia"
## [16,] "d__Bacteria" "p__Firmicutes_A" "c__Clostridia"
## [17,] "d__Bacteria" "p__Bacteroidota" "c__Bacteroidia"
## [18,] "d__Bacteria" "p__Bacteroidota" "c__Bacteroidia"
## [19,] "d__Bacteria" "p__Proteobacteria" "c__Gammaproteobacteria"
## [20,] "d__Bacteria" "p__Firmicutes_A" "c__Clostridia"
## [21,] "d__Bacteria" "p__Firmicutes" "c__Bacilli"
## [22,] "d__Bacteria" "p__Firmicutes_A" "c__Clostridia"
## [23,] "d__Bacteria" "p__Firmicutes_A" "c__Clostridia"
## [24,] "d__Bacteria" "p__Firmicutes_A" "c__Clostridia"
## [25,] "d__Bacteria" "p__Firmicutes_A" "c__Clostridia"
## [26,] "d__Bacteria" "p__Firmicutes_A" "c__Clostridia"
## [27,] "d__Bacteria" "p__Firmicutes_A" "c__unclassified"
## Order Family
## [1,] "o__Bacteroidales" "f__Tannerellaceae"
## [2,] "o__Oscillospirales" "f__Acutalibacteraceae"
## [3,] "o__Oscillospirales" "f__Oscillospiraceae"
## [4,] "o__unclassified" "f__unclassified"
## [5,] "o__Christensenellales" "f__Christensenellaceae"
## [6,] "o__Lachnospirales" "f__Lachnospiraceae"
## [7,] "o__Lachnospirales" "f__Lachnospiraceae"
## [8,] "o__Coriobacteriales" "f__Coriobacteriaceae"
## [9,] "o__Verrucomicrobiales" "f__Akkermansiaceae"
## [10,] "o__Veillonellales" "f__Veillonellaceae"
## [11,] "o__Lachnospirales" "f__Lachnospiraceae"
## [12,] "o__Lachnospirales" "f__Lachnospiraceae"
## [13,] "o__Lactobacillales" "f__Streptococcaceae"
## [14,] "o__Oscillospirales" "f__Oscillospiraceae"
## [15,] "o__Bacteroidales" "f__Bacteroidaceae"
## [16,] "o__Oscillospirales" "f__Acutalibacteraceae"
## [17,] "o__Bacteroidales" "f__Marinifilaceae"
## [18,] "o__Bacteroidales" "f__Bacteroidaceae"
## [19,] "o__Enterobacterales" "f__Pasteurellaceae"
## [20,] "o__unclassified" "f__unclassified"
## [21,] "o__Haloplasmatales" "f__Turicibacteraceae"
## [22,] "o__Lachnospirales" "f__Lachnospiraceae"
## [23,] "o__Lachnospirales" "f__Lachnospiraceae"
## [24,] "o__Lachnospirales" "f__Lachnospiraceae"
## [25,] "o__Lachnospirales" "f__Lachnospiraceae"
## [26,] "o__Lachnospirales" "f__Lachnospiraceae"
## [27,] "o__unclassified" "f__unclassified"
## Genus Species
## [1,] "g__Parabacteroides" "s__Parabacteroides__merdae"
## [2,] "g__unclassified" "s__unclassified"
## [3,] "g__unclassified" "s__unclassified"
## [4,] "g__unclassified" "s__unclassified"
## [5,] "g__PROV_t__186843" "s__PROV_t__186843"
## [6,] "g__unclassified" "s__unclassified"
## [7,] "g__unclassified" "s__unclassified"
## [8,] "g__Collinsella" "s__unclassified"
## [9,] "g__Akkermansia" "s__Akkermansia__muciniphila"
## [10,] "g__Veillonella" "s__unclassified"
## [11,] "g__Ruminococcus_A" "s__unclassified"
## [12,] "g__Blautia_A" "s__unclassified"
## [13,] "g__Streptococcus" "s__unclassified"
## [14,] "g__Intestinimonas" "s__Intestinimonas__butyriciproducens"
## [15,] "g__Bacteroides" "s__unclassified"
## [16,] "g__Anaeromassilibacillus" "s__PROV_t__258798"
## [17,] "g__Odoribacter" "s__Odoribacter__splanchnicus"
## [18,] "g__Bacteroides" "s__Bacteroides__thetaiotaomicron"
## [19,] "g__Haemophilus_D" "s__unclassified"
## [20,] "g__unclassified" "s__unclassified"
## [21,] "g__Turicibacter" "s__unclassified"
## [22,] "g__Blautia_A" "s__Blautia_A__wexlerae"
## [23,] "g__PROV_t__182732" "s__unclassified"
## [24,] "g__Blautia_A" "s__PROV_t__220541"
## [25,] "g__PROV_t__209626" "s__PROV_t__209626"
## [26,] "g__Eubacterium_E" "s__Eubacterium_E__hallii_A"
## [27,] "g__unclassified" "s__unclassified"
## Strain
## [1,] "t__unclassified"
## [2,] "t__unclassified"
## [3,] "t__unclassified"
## [4,] "t__unclassified"
## [5,] "t__186843"
## [6,] "t__unclassified"
## [7,] "t__unclassified"
## [8,] "t__unclassified"
## [9,] "t__unclassified"
## [10,] "t__unclassified"
## [11,] "t__unclassified"
## [12,] "t__unclassified"
## [13,] "t__unclassified"
## [14,] "t__unclassified"
## [15,] "t__unclassified"
## [16,] "t__258798"
## [17,] "t__unclassified"
## [18,] "t__unclassified"
## [19,] "t__unclassified"
## [20,] "t__unclassified"
## [21,] "t__unclassified"
## [22,] "t__unclassified"
## [23,] "t__unclassified"
## [24,] "t__220541"
## [25,] "t__209626"
## [26,] "t__unclassified"
## [27,] "t__unclassified"
#ANCOM_old ver (2.1 can be seen after)
#retrive taxa
#format metadata
#metada_ps<-sample_data(filtered_ps003)
#metada_ps<-as.data.frame(metada_ps)
#res_table_filt<-otu_table(filtered_ps003)
#res_table_filt<-as.data.frame(res_table_filt)
#res_table_filt<-t(res_table_filt)
#res_table_filt<-as.data.frame(res_table_filt)
#res_table_filt$metada<-metada_ps$phenotype[rownames(metada_ps)%in%rownames(res_table_filt)]
#ancom.all.filt.0.05 <- ANCOM(res_table_filt, sig = 0.05, multcorr = 2)
#saveRDS(ancom.all.filt.0.05, file=paste0(output_data, "ANCOM/ancom_res", mtgseq_cut, ".rds"))
#Format the Detected Table w/ Taxa
#main.05 <- cbind(ancom.all.filt.0.05$detected, as(tax_table(filtered_ps003)[ancom.all.filt.0.05$detected, ], "matrix"))
#main.05esv <- main.05
#row.names(main.05) <- NULL
#main.05
#New ANCOM 2.1
#retrive taxa
#format metadata
metada_ps<-sample_data(filtered_ps003)
metada_ps<-as.data.frame(metada_ps)
metada_ps$HostName <- as.character(metada_ps$Host.Name)
metada_ps$phenotype <- as.character(metada_ps$phenotype)
metada_ps<-tibble(metada_ps$HostName, metada_ps$phenotype, rows = rownames(metada_ps))
colnames(metada_ps) <- c("HostName", "phenotype", "Biospecimen.Barcode")
res_table_filt<-otu_table(filtered_ps003)
res_table_filt<-as.data.frame(res_table_filt)
prepro<-feature_table_pre_process(feature_table = res_table_filt, meta_data = metada_ps, sample_var = "Biospecimen.Barcode", group_var = "phenotype", out_cut = 0.05, zero_cut = 0.90, lib_cut = 1000, neg_lb = FALSE)
feature_table = prepro$feature_table # Preprocessed feature table
meta_data = prepro$meta_data # Preprocessed metadata
struc_zero = prepro$structure_zeros # Structural zero info
main_var = "phenotype"; p_adj_method = "BH"; alpha = 0.05
adj_formula = NULL; rand_formula = "~ 1 | HostName" ; control = list(msMaxIter = 50)
ancom.all.filt.0.05 <- ANCOM(feature_table, meta_data, struc_zero, main_var, p_adj_method,
alpha, adj_formula, rand_formula)#, control)
dir.create(path = "ANCOM")
saveRDS(ancom.all.filt.0.05, file=paste0(output_data, "ANCOM/ancom_res", mtgseq_cut, ".rds"))
#Format the Detected Table w/ Taxa
ancom.all.filt.0.05$detected
## NULL
#no significant taxa from ANCOM
#main.05 <- cbind(ancom.all.filt.0.05$detected, as(tax_table(filtered_ps003)[ancom.all.filt.0.05$detected, ], "matrix"))
#main.05esv <- main.05
#row.names(main.05) <- NULL
#main.05
#Trying by each timepoint
#retrive taxa
#format metadata
ps3<-subset_samples(filtered_ps003, Within.study.sampling.date == "Timepoint 3" )
metada_ps<-sample_data(ps3)
metada_ps<-as.data.frame(metada_ps)
metada_ps$HostName <- as.character(metada_ps$Host.Name)
metada_ps$phenotype <- as.character(metada_ps$phenotype)
metada_ps<-tibble(metada_ps$HostName, metada_ps$phenotype, rows = rownames(metada_ps))
colnames(metada_ps) <- c("HostName", "phenotype", "Biospecimen.Barcode")
res_table_filt<-otu_table(ps3)
res_table_filt<-as.data.frame(res_table_filt)
prepro<-feature_table_pre_process(feature_table = res_table_filt, meta_data = metada_ps, sample_var = "Biospecimen.Barcode", group_var = "phenotype", out_cut = 0.05, zero_cut = 0.90, lib_cut = 1000, neg_lb = FALSE)
feature_table = prepro$feature_table # Preprocessed feature table
meta_data = prepro$meta_data # Preprocessed metadata
struc_zero = prepro$structure_zeros # Structural zero info
main_var = "phenotype"; p_adj_method = "BH"; alpha = 0.05
adj_formula = NULL; rand_formula = NULL
ancom.all.filt.0.05.3 <- ANCOM(feature_table, meta_data, struc_zero, main_var, p_adj_method,
alpha, adj_formula, rand_formula)#, control)
#dir.create(path = "ANCOM")
saveRDS(ancom.all.filt.0.05.3, file=paste0(output_data, "ANCOM/ancom_res3_", mtgseq_cut, ".rds"))
#Format the Detected Table w/ Taxa
ancom.all.filt.0.05.3$detected
## NULL
# No sig
#format metadata
ps2<-subset_samples(filtered_ps003, Within.study.sampling.date == "Timepoint 2" )
metada_ps<-sample_data(ps2)
metada_ps<-as.data.frame(metada_ps)
metada_ps$HostName <- as.character(metada_ps$Host.Name)
metada_ps$phenotype <- as.character(metada_ps$phenotype)
metada_ps<-tibble(metada_ps$HostName, metada_ps$phenotype, rows = rownames(metada_ps))
colnames(metada_ps) <- c("HostName", "phenotype", "Biospecimen.Barcode")
res_table_filt<-otu_table(ps2)
res_table_filt<-as.data.frame(res_table_filt)
prepro<-feature_table_pre_process(feature_table = res_table_filt, meta_data = metada_ps, sample_var = "Biospecimen.Barcode", group_var = "phenotype", out_cut = 0.05, zero_cut = 0.90, lib_cut = 1000, neg_lb = FALSE)
feature_table = prepro$feature_table # Preprocessed feature table
meta_data = prepro$meta_data # Preprocessed metadata
struc_zero = prepro$structure_zeros # Structural zero info
main_var = "phenotype"; p_adj_method = "BH"; alpha = 0.05
adj_formula = NULL; rand_formula = NULL
ancom.all.filt.0.05.2 <- ANCOM(feature_table, meta_data, struc_zero, main_var, p_adj_method,
alpha, adj_formula, rand_formula)#, control)
#dir.create(path = "ANCOM")
saveRDS(ancom.all.filt.0.05.2, file=paste0(output_data, "ANCOM/ancom_res2_", mtgseq_cut, ".rds"))
#Format the Detected Table w/ Taxa
ancom.all.filt.0.05.2$detected
## NULL
# No sig
#format metadata
ps1<-subset_samples(filtered_ps003, Within.study.sampling.date == "Timepoint 1" )
metada_ps<-sample_data(ps1)
metada_ps<-as.data.frame(metada_ps)
metada_ps$HostName <- as.character(metada_ps$Host.Name)
metada_ps$phenotype <- as.character(metada_ps$phenotype)
metada_ps<-tibble(metada_ps$HostName, metada_ps$phenotype, rows = rownames(metada_ps))
colnames(metada_ps) <- c("HostName", "phenotype", "Biospecimen.Barcode")
res_table_filt<-otu_table(ps1)
res_table_filt<-as.data.frame(res_table_filt)
prepro<-feature_table_pre_process(feature_table = res_table_filt, meta_data = metada_ps, sample_var = "Biospecimen.Barcode", group_var = "phenotype", out_cut = 0.05, zero_cut = 0.90, lib_cut = 1000, neg_lb = FALSE)
feature_table = prepro$feature_table # Preprocessed feature table
meta_data = prepro$meta_data # Preprocessed metadata
struc_zero = prepro$structure_zeros # Structural zero info
main_var = "phenotype"; p_adj_method = "BH"; alpha = 0.05
adj_formula = NULL; rand_formula = NULL
ancom.all.filt.0.05.1 <- ANCOM(feature_table, meta_data, struc_zero, main_var, p_adj_method,
alpha, adj_formula, rand_formula)#, control)
#dir.create(path = "ANCOM")
saveRDS(ancom.all.filt.0.05.1, file=paste0(output_data, "ANCOM/ancom_res1_", mtgseq_cut, ".rds"))
#Format the Detected Table w/ Taxa
ancom.all.filt.0.05.1$detected
## NULL
# No sig
### functions to plot
make_vis_plots <- function(ps_norm, grouping, tax, plot_type=c('box', 'bar')){
# ps should be a normalized (DESeq or CSS) phyloseq object
# grouping should match the column name in the sample_data
# tax is a taxonomical bin id (ASV) in the counts table to plot
# subset phyloseq object to select ASV of interest
ps_filt=prune_taxa(taxa_names(ps_norm) %in% tax, ps_norm)
# get normalized counts
plot_table<-data.table(otu_table(ps_filt), keep.rownames=TRUE)[rn %in% tax]
# add very small value, min/100000 to 0
plot_table <- melt(plot_table, id.vars='rn')
plot_table$value <- plot_table$value+min(plot_table[value!=0]$value)/100000
# add metadata
groupDT=data.table(data.frame(sample_data(ps_filt)[, c(grouping, 'Within.study.sampling.date')]), keep.rownames=TRUE)
setnames(groupDT, 'rn', 'variable')
plot_table <- merge(plot_table, groupDT, by='variable', all.x=TRUE)
# change variable to general name
setnames(plot_table, grouping, 'Group')
# boxplot
if(plot_type=='box'){
ggplot(data=plot_table, aes(x=Within.study.sampling.date, y = value, fill=Group)) +
geom_boxplot(outlier.color=NA) +
geom_jitter(position=position_jitterdodge(0.2), cex=1.5, color="gray44") +
labs(title =deparse(substitute(ps_norm)), x='', y ='Proportional counts, log scale') +
scale_y_log10() +
scale_fill_manual(values=sgColorPalette)+
theme_minimal() + facet_wrap(~rn, scales='free', ncol=3)+
theme(axis.text.x = element_text(angle = 45, vjust = 1, hjust=1))
} else if (plot_type=='bar'){
plot_table2 <- plot_table[, list(mean_ct=mean(value), sem=sd(value)/sqrt(.N)), by=c('Group', 'Within.study.sampling.date', 'rn')]
ggplot(data=plot_table2, aes(x=Within.study.sampling.date, y =mean_ct, fill=Group)) +
geom_bar(stat='identity', position=position_dodge()) +
geom_errorbar(aes(ymin=mean_ct-sem, ymax=mean_ct+sem), width=0.2, position=position_dodge(0.9))+
labs(title =deparse(substitute(ps_norm)), x='', y ='Proportional counts, 0 to 1 scale') +
#scale_y_log10() +
scale_fill_manual(values=sgColorPalette)+
theme_minimal() + facet_wrap(~rn, scales='free', ncol=3)+
theme(axis.text.x = element_text(angle = 45, vjust = 1, hjust=1))
}
}
######BOXPLOT of significant ones
# make significant taxa into one table so that all pvalues retained
significant_tax=NULL
significant_tax <- merge(data.table(deseq_res_P1[[2]], keep.rownames=TRUE)[, list(rn, deseq_P1_adjp=padj)],
data.table(deseq_res_P2[[2]], keep.rownames=TRUE)[, list(rn, deseq_P2_adjp=padj)],
by='rn', all=TRUE)
significant_tax <- merge(significant_tax,
data.table(deseq_res_P3[[2]], keep.rownames=TRUE)[, list(rn, deseq_P3_adjp=padj)],
by='rn', all=TRUE)
significant_tax <- merge(significant_tax,
data.table(bla[[2]], keep.rownames=TRUE)[, list(rn, deseq_timeseries_adjp=padj)],
by='rn', all=TRUE)
significant_tax <- merge(significant_tax,
data.table(zig_res_P1[[2]], keep.rownames=TRUE)[, list(rn, mtgseq_P1_adjp=adjPvalues)],
by='rn', all=TRUE)
significant_tax <- merge(significant_tax,
data.table(zig_res_P2[[2]], keep.rownames=TRUE)[, list(rn, mtgseq_P2_adjp=adjPvalues)],
by='rn', all=TRUE)
significant_tax <- merge(significant_tax,
data.table(zig_res_P3[[2]], keep.rownames=TRUE)[, list(rn, mtgseq_P3_adjp=adjPvalues)],
by='rn', all=TRUE)
significant_tax <- merge(significant_tax,
data.table(zig_res_all[[2]], keep.rownames=TRUE)[, list(rn, mtgseq_timeseries_adjp=adjPvalues)],
by='rn', all=TRUE)
# remove nothing
significant_tax <- significant_tax[rn!='nothing']
# write results
write.csv(significant_tax, file=paste0(output_data, 'Significant_res_deseq_q', deseq_cut, '_mtgseq_q', mtgseq_cut, '.csv'), row.names=FALSE)
datatable(significant_tax)
# also, find taxonomical annotations
# NOTE: single ASV may have multiple annotations due to tie hits
#Changing var all_tax_data to tax_table since I don't have this object since I don't have all_tax_data as a object,
datatable(tax_table(ps_not_norm_comp)[rownames(tax_table(ps_not_norm_comp)) %in% significant_tax$rn])
## plot
# common by deseq
com_deseq_taxa=significant_tax[!is.na(deseq_P1_adjp) & !is.na(deseq_P2_adjp) & !is.na(deseq_P3_adjp)]
if(nrow(com_deseq_taxa)>0){
print(make_vis_plots(ps_TSS_norm_pass_min_postDD_sup003, 'phenotype', com_deseq_taxa$rn, 'box'))
} else {
print('no common DESeq significant taxa')
}
## [1] "no common DESeq significant taxa"
# deseq timeseries
if(nrow(significant_tax[!is.na(deseq_timeseries_adjp)])>0){
print(make_vis_plots(ps_TSS_norm_pass_min_postDD_sup003, 'phenotype', significant_tax[!is.na(deseq_timeseries_adjp)]$rn, 'box'))
# plot bar as well
print(make_vis_plots(ps_TSS_norm_pass_min_postDD_sup003, 'phenotype', significant_tax[!is.na(deseq_timeseries_adjp)]$rn, 'bar'))
} else {
print('no DESeq timeseries significant taxa')
}
# common by metagenomeseq
com_mtgseq_taxa=significant_tax[!is.na(mtgseq_P1_adjp) & !is.na(mtgseq_P2_adjp) & !is.na(mtgseq_P3_adjp)]
if(nrow(com_mtgseq_taxa)>0){
print(make_vis_plots(ps_TSS_norm_pass_min_postDD_sup003, 'phenotype', com_mtgseq_taxa$rn, 'box'))
} else {
print('no common metagenomeSeq significant taxa')
}
### Meta_genome timeseries results
# mtgseq timeseries
if(nrow(significant_tax[!is.na(mtgseq_timeseries_adjp)])>0){
print(make_vis_plots(ps_TSS_norm_pass_min_postDD_sup003, 'phenotype', significant_tax[!is.na(mtgseq_timeseries_adjp)]$rn[1:6], 'box'))
# plot bar as well
print(make_vis_plots(ps_TSS_norm_pass_min_postDD_sup003, 'phenotype', significant_tax[!is.na(mtgseq_timeseries_adjp)]$rn[1:6], 'bar'))
} else {
print('no Mtgseq timeseries significant taxa')
}
if(nrow(significant_tax[!is.na(mtgseq_timeseries_adjp)])>0){
print(make_vis_plots(ps_TSS_norm_pass_min_postDD_sup003, 'phenotype', significant_tax[!is.na(mtgseq_timeseries_adjp)]$rn[7:12], 'box'))
# plot bar as well
print(make_vis_plots(ps_TSS_norm_pass_min_postDD_sup003, 'phenotype', significant_tax[!is.na(mtgseq_timeseries_adjp)]$rn[7:12], 'bar'))
} else {
print('no Mtgseq timeseries significant taxa')
}
if(nrow(significant_tax[!is.na(mtgseq_timeseries_adjp)])>0){
print(make_vis_plots(ps_TSS_norm_pass_min_postDD_sup003, 'phenotype', significant_tax[!is.na(mtgseq_timeseries_adjp)]$rn[13:18], 'box'))
# plot bar as well
print(make_vis_plots(ps_TSS_norm_pass_min_postDD_sup003, 'phenotype', significant_tax[!is.na(mtgseq_timeseries_adjp)]$rn[13:18], 'bar'))
} else {
print('no Mtgseq timeseries significant taxa')
}
if(nrow(significant_tax[!is.na(mtgseq_timeseries_adjp)])>0){
print(make_vis_plots(ps_TSS_norm_pass_min_postDD_sup003, 'phenotype', significant_tax[!is.na(mtgseq_timeseries_adjp)]$rn[19:24], 'box'))
# plot bar as well
print(make_vis_plots(ps_TSS_norm_pass_min_postDD_sup003, 'phenotype', significant_tax[!is.na(mtgseq_timeseries_adjp)]$rn[19:24], 'bar'))
} else {
print('no Mtgseq timeseries significant taxa')
}
if(nrow(significant_tax[!is.na(mtgseq_timeseries_adjp)])>0){
print(make_vis_plots(ps_TSS_norm_pass_min_postDD_sup003, 'phenotype', significant_tax[!is.na(mtgseq_timeseries_adjp)]$rn[25:27], 'box'))
# plot bar as well
print(make_vis_plots(ps_TSS_norm_pass_min_postDD_sup003, 'phenotype', significant_tax[!is.na(mtgseq_timeseries_adjp)]$rn[25:27], 'bar'))
} else {
print('no Mtgseq timeseries significant taxa')
}
Compare resulting amplicon data between and within sample types by canonical correlation analysis, regression profiling, and visualization (e.g. non-metric multi-dimensional scaling [NMDS], principle coordinates of analysis, principle component analysis).
plotting_phenotype_consPcoA <- function(ps,title){
fam_6<-names(table(sample_data(ps)$Family.group.ID)[table(sample_data(ps)$Family.group.ID) == 6])
ps_6fam<-prune_samples(sample_data(ps)$Family.group.ID %in% fam_6,ps )
ps_pcoa_ord <- ordinate(
physeq = ps_6fam,
method = "CAP",
distance = "bray",
formula = ~ phenotype
)
p<-plot_ordination(
physeq = ps_6fam,
ordination = ps_pcoa_ord,
color = "phenotype",
axes = c(1,2),
title= paste("Constrained PcoA",title,"ordinated by phenotype with all timepoints")
) +
geom_point( size = 2) +
scale_color_manual(values=sgColorPalette)+
theme_minimal()+
theme(text = element_text(size =10), plot.title = element_text(size=10))
#sum_pcoA_DesEq<-summary(ps_pcoa_ord)
erie_bray_sum_pcoA <- phyloseq::distance(ps, method = "bray")
sampledf <- data.frame(sample_data(ps))
beta_di<-betadisper(erie_bray_sum_pcoA, sampledf$Family.group.ID)
to_return<-list()
to_return[[1]]<-p
to_return[[2]]<-beta_di
return(to_return)
}
#With Deseq
DeSeq_distance<-plotting_phenotype_consPcoA(ps_DeSeq_norm_pass_min_postDD_sup003, "Deseq")
# plot
DeSeq_distance[[1]]
#same with CSS
CSS_distance<-plotting_phenotype_consPcoA(ps_CSS_norm_pass_min_postDD_sup003, "CSS")
# plot
CSS_distance[[1]]
#plotting
#Now we have: 803 taxa and 559 samples
#Looking at the family fro the complete set of samples
#Keeping the same ordination but filtering to the families with only 6 point to help vizualize the plot
#Looking at NORMALIZATION
plotting_Fam_consPcoA <- function(ps,title){
fam_6<-names(table(sample_data(ps)$Family.group.ID)[table(sample_data(ps)$Family.group.ID) == 6])
ps_6fam<-prune_samples(sample_data(ps)$Family.group.ID %in% fam_6,ps )
sample_data(ps_6fam)$Family.group.ID <- paste0('fam', as.character(sample_data(ps_6fam)$Family.group.ID))
ps_pcoa_ord <- ordinate(
physeq = ps_6fam,
method = "CAP",
distance = "bray",
formula = ~ Family.group.ID
)
p<-plot_ordination(
physeq = ps_6fam,
ordination = ps_pcoa_ord,
color = "Family.group.ID",
axes = c(1,2),
title= paste("Constrained PcoA",title,"ordinated by families with all timepoints")
) +
geom_point( size = 2) +
theme_minimal()+
theme(text = element_text(size =10), plot.title = element_text(size=10), legend.position='none')
#sum_pcoA_DesEq<-summary(ps_pcoa_ord)
erie_bray_sum_pcoA <- phyloseq::distance(ps, method = "bray")
sampledf <- data.frame(sample_data(ps))
beta_di<-betadisper(erie_bray_sum_pcoA, sampledf$Family.group.ID)
to_return<-list()
to_return[[1]]<-p
to_return[[2]]<-beta_di
return(to_return)
}
#With Deseq
DeSeq_distance<-plotting_Fam_consPcoA(ps_DeSeq_norm_pass_min_postDD_sup003, "Deseq")
# plot
DeSeq_distance[[1]]
#same with CSS
CSS_distance<-plotting_Fam_consPcoA(ps_CSS_norm_pass_min_postDD_sup003, "CSS")
# plot
CSS_distance[[1]]
#the distance in those plot?
#average_distance_to_median
#pdf(file=paste0(output_data, "Figures/Distance_DeSeq_CSS_", Sys.Date(), ".pdf"))
boxplot(DeSeq_distance[[2]]$distances,CSS_distance[[2]]$distances, names=c("DeSeq", "CSS"),
xlab = "Type of Normalization", ylab = "Distance on Component 1 & 2", main ="Intragroup distance for each family")
#dev.off()
Characterize and assess the diversity of each sample, and evaluate the extent of dissimilarity between the cohorts
ER <- estimate_richness(ps_not_norm_comp, measures=c("Observed", "Chao1", "Shannon"))
ER <- cbind(ER, sample_data(ps_not_norm_comp)[row.names(ER), c("phenotype", "Family.group.ID", "Within.study.sampling.date")])
ER <- data.table(ER, keep.rownames = TRUE)
ER <- melt(ER, id.vars=c('rn', 'phenotype', "Family.group.ID", "Within.study.sampling.date"))
# plot
ggplot(data=ER[variable!='se.chao1'], aes(x=phenotype, y=value, fill=phenotype))+
geom_boxplot(width=0.7, outlier.colour='white')+
geom_jitter(size=1, position=position_jitter(width=0.1))+
xlab('')+ylab('')+
scale_fill_manual(values=sgColorPalette)+
theme_minimal()+facet_wrap(~variable, scales='free')
# run t-test to check significance
ttest=NULL
for(alphad in c('Observed', 'Chao1', 'Shannon')){
ttest_res=t.test(value ~ phenotype, data=ER[variable==alphad], var.equal=TRUE)
ttest=rbindlist(list(ttest, data.table(alpha_index=alphad, pvalue=ttest_res$p.value)))
}
pander(ttest)
| alpha_index | pvalue |
|---|---|
| Observed | 0.2772 |
| Chao1 | 0.2772 |
| Shannon | 0.5052 |
#Let's do a PcoA #not much differences
GP.ord <- ordinate(ps_DeSeq_norm_pass_min_postDD_sup003, "PCoA", "bray")
p2 = plot_ordination(ps_DeSeq_norm_pass_min_postDD_sup003, GP.ord, type="samples", color="phenotype") +
geom_point( size = 1)+
scale_color_manual(values=sgColorPalette)+
theme_minimal()
p2
non- parametric statistical approaches (ANOSIM, ADONIS, ANOVA, PERMANOVA, etc.) will be employed to determine the significance of noteworthy factors, such as digital phenotype, probiotic and/or antibiotic use
permanova <- function(physeq, factorName, ifnumeric, pmt=999){
set.seed(1)
braydist = phyloseq::distance(physeq, "bray")
form <- as.formula(paste("braydist ~ ", c(factorName), sep = ""))
metaDF=data.frame(sample_data(physeq)[, as.character(factorName)])
# if numerical variable, make sure the class
if(ifnumeric){
metaDF[, factorName] <- as.numeric(metaDF[, factorName])
factor_class='numeric'
} else {
factor_class='categorical'
}
perm <- adonis(form, permutations = pmt, metaDF)
permDT=data.table(Variable=factorName,
FactorClass=factor_class,
TotalN=perm$aov.tab['Total','Df']+1,
R2=perm$aov.tab[factorName, 'R2'],
pvalue=perm$aov.tab[factorName,'Pr(>F)'][1])
return(permDT)
}
#betadispersion
#we keep only the cateory selected above as relevant
tmp_metadat<-metadata_ok[,c(num_cat,fac_cat)]
#additionnal error to remove: not enough sample:
tmp_metadat<-tmp_metadat[,-which(colnames(tmp_metadat) %in% c("Number.of.pet.reptiles","Number.of.pet.horses", "Pet.horse"))]
#additionnal error to remove: filled with only NA or one factor, cant do permutest on it due to adonis function requirements
col_levels<-sapply(tmp_metadat, levels)
col_levelscount<-sapply(col_levels, length)
tmp_metadat_1 <- tmp_metadat
#Since there are no numerics based on code below, will drop all that dont have 2 or more levels
#tmp_metadat[,which(sapply(tmp_metadat, class) == "numeric")]
tmp_metadat <- tmp_metadat[,which(col_levelscount >= 2)]
set.seed(1)
pval_factors_diper=c()
nb_samples_disper=c()
for (i in 1:length(tmp_metadat)){
#cat (i,"\t")
test_map<-tmp_metadat[!is.na(tmp_metadat[,i]) & tmp_metadat[,i] != "" ,]
ps.tmp<-copy(ps_DeSeq_norm_pass_min_postDD_sup003)
sample_data(ps.tmp) <- test_map
df_metadata <- data.frame(sample_data(ps.tmp))
df_metadata<-df_metadata[df_metadata[,colnames(test_map)[i]] != "",]
# use prune_samples instead of subset_samples
keepid=!is.na(get_variable(ps.tmp, colnames(test_map)[i])) &
get_variable(ps.tmp, colnames(test_map)[i])!='' &
get_variable(ps.tmp, colnames(test_map)[i])!='NA'
ps.tmp <- prune_samples(keepid, ps.tmp)
#ps.tmp <- subset_samples(ps.tmp, colnames(test_map)[i] !="")
tmp_nb_samples<-dim(otu_table(ps.tmp))[2]
OTU_tables_bray <- phyloseq::distance(ps.tmp, method = "bray")
beta <- betadisper(OTU_tables_bray, df_metadata[,colnames(test_map)[i]])
tmp<-permutest(beta)
tmp<-tmp$tab$`Pr(>F)`[1]
pval_factors_diper<-c(pval_factors_diper,tmp)
nb_samples_disper<-c(nb_samples_disper,tmp_nb_samples)}
#correct the p.value
names(pval_factors_diper)<-colnames(tmp_metadat)
pval_factors_diper<-p.adjust(pval_factors_diper, method = "fdr")
to_remove_betadis<-names(pval_factors_diper)[pval_factors_diper<0.05]
# list of permanova variables
#meta_cat <- tibble(col_levelscount >= 2, colnames(tmp_metadat_1), sapply(tmp_metadat_1, class))
#rownames(meta_cat) <- colnames(tmp_metadat_1)
#colnames(meta_cat) <- c("permanova", "varname", "type")
#meta_cat$type <- gsub("factor", "Categorical", meta_cat$type)
#meta_cat$type <- gsub("numerical", "Continuous", meta_cat$type)
#meta_cat file listed phenotype as false for permanova, but I will add it back in)
meta_cat$permanova[which(meta_cat$varname == "phenotype")] <- "Categorical"
permanova_var=meta_cat[which(meta_cat$permanova!=FALSE),]
set.seed(1)
permanova_res=NULL
for(j in 1:nrow(permanova_var)){
#print(factorName1)
#pander(table(sample_data(ps_DeSeq_norm_pass_min_postDD_sup003)[, factorName1]))
# variable name (as.characteradded)
var_name=as.character(permanova_var$varname[j])
# remove all NAs
keepid=!is.na(get_variable(ps_DeSeq_norm_pass_min_postDD_sup003, var_name)) &
get_variable(ps_DeSeq_norm_pass_min_postDD_sup003, var_name)!='NA' &
get_variable(ps_DeSeq_norm_pass_min_postDD_sup003, var_name)!=''
tmp_ps <- prune_samples(keepid, ps_DeSeq_norm_pass_min_postDD_sup003)
# Check if there is more than 1 values (categories)
if(uniqueN(sample_data(tmp_ps)[, var_name])>1){
# if categorical
if(permanova_var$permanova[j]=='Categorical'){
# run permanova only if there are more than 1 groups
p <- permanova(tmp_ps, factorName=var_name, ifnumeric=FALSE, pmt=999)
permanova_res=rbindlist(list(permanova_res, p))
rm(p)
}
# if continuous
if(permanova_var$permanova[j]=='Continuous'){
p <- permanova(tmp_ps, factorName=var_name, ifnumeric=TRUE, pmt=999)
permanova_res=rbindlist(list(permanova_res, p))
rm(p)
}
}
rm(var_name)
}
# write
write.csv(permanova_res, file=paste0(output_data, 'PERMANOVA.csv'), row.names=FALSE)
# total number of variables tested
uniqueN(permanova_res$Variable)
[1] 123
# Factor class
pander(table(permanova_res$FactorClass))
| categorical | numeric |
|---|---|
| 102 | 21 |
# number of significant variables
uniqueN(permanova_res[pvalue<permanova_pcut]$Variable)
[1] 113
#and now removing the ones with betadispersion significant
impacting_compo<-setdiff(permanova_res[pvalue<permanova_pcut]$Variable, to_remove_betadis)
#and now the ones also significant between the two cohorts
impacting_compo<-impacting_compo[impacting_compo %in% c(names(all_chisquare),slopesig_p.val)]
permanova_res<- permanova_res[permanova_res$Variable %in% impacting_compo,]
# sort
permanova_res <- permanova_res[order(R2, decreasing=TRUE)]
datatable(permanova_res)
write.csv(permanova_res, file=paste0(output_data, 'PERMANOV_betadis_imp_corhort.csv'), row.names=FALSE)
# function to plot PCoA, only for higher R2 value
imp_factors<-permanova_res$Variable[permanova_res$R2 > 0.01]
imp<-list()
for (i in 1:length(imp_factors)){
if(anyNA(map[,imp_factors[i]]) == FALSE){
imp[i] <- imp_factors[i]
}
}
impforpcoa<-unlist(imp)
add<-paste(impforpcoa, collapse = " + ")
#copy and paste form variable below into formula for pcoa for convenience (not sure why it does not work as an input for formula, but copy/paste as text works)
form<- as.formula(paste0("~ ", add))
#ordination formula only working with one variable in formula...
ps_pcoa <- ordinate(
physeq = ps_DeSeq_norm_pass_min_postDD_sup003,
method = "CAP",
distance = "bray",
#Did not include Toilet.cover and Meat/Seafood Longitudinal, Fruit..consumption.frequency...longitudinal. and LR10.prediction..M3. due to NA missing values which does not allow for ordination
formula = ~Stool.frequency + Number.of.roommates + Number.of.pet.fish +
Age..years. + Number.of.pet.cats + Olive.oil.used.in.cooking..M3. +
Number.of.pet.reptiles + Minimum.time.since.antibiotics +
Vitamin.D..consumption.frequency.)
title_prep<-impforpcoa
to_plot=list()
for (i in 1:4){
to_plot[[i]] <- plot_ordination(
physeq = ps_DeSeq_norm_pass_min_postDD_sup003,
ordination = ps_pcoa,
color = title_prep[i],
axes = c(1,2),
title=title_prep[i]
) +
geom_point( size = 0.5) +
theme(text = element_text(size =20), plot.title = element_text(size=15))
}
to_plot[[5]]<-plot_ordination(physeq = ps_DeSeq_norm_pass_min_postDD_sup003, ordination = ps_pcoa, type="taxa",title ="Taxa") + theme(text = element_text(size =15))
lay <- rbind(c(1),
c(2),
c(3),
c(4),
c(5))
#pdf(paste0(output_data,"confounding_factors.pdf",width=16,height=40))
grid.arrange(grobs = to_plot, layout_matrix = lay)
top_potential_confounds <- imp_factors
#dev.off()
#Let's have a look at the plot
plot_ordination(physeq = ps_DeSeq_norm_pass_min_postDD_sup003, ordination = ps_pcoa, type="taxa",title ="Taxa") + theme(text = element_text(size =8))
#ok let's try to find the spcies that show some importance in this PCA
taxa.to.select<-vegan::scores(ps_pcoa)$species
#now plot it with no name for visibilty
rownames(taxa.to.select)<-c()
s.arrow(taxa.to.select) #the taxa that influence the most the plots are above 0.25
taxa.to.select.to.rem<-vegan::scores(ps_pcoa)$species[abs(vegan::scores(ps_pcoa)$species[,1])>0.1 | abs(vegan::scores(ps_pcoa)$species[,2])>0.1,]
#any overlap with the 5 important?
rownames(bla[[2]]) %in% taxa.to.select.to.rem #NOPE!
## [1] FALSE FALSE
tmpps <- prune_samples((ps_DeSeq_norm_pass_min_postDD_sup003@sam_data$Family.group.ID == "1"), ps_DeSeq_norm_pass_min_postDD_sup003)
tmppsA <- prune_samples(tmpps@sam_data$phenotype == "A", tmpps)
tmppsN <- prune_samples(tmpps@sam_data$phenotype == "N", tmpps)
A<-distance(tmppsA, "bray", type = "samples")
total_distanceA=A[1]+A[2]+A[3]
N<-distance(tmppsN, "bray", type = "samples")
total_distanceN=N[1]+N[2]+N[3]
tab<-tibble(total_distanceA, total_distanceN, tmpps@sam_data$Family.group.ID[1])
colnames(tab) <- c("TotDistanceAut", "TotDistanceNeu", "Family")
for(i in unique(ps_DeSeq_norm_pass_min_postDD_sup003@sam_data$Family.group.ID)){
tmpps <- prune_samples((ps_DeSeq_norm_pass_min_postDD_sup003@sam_data$Family.group.ID == i), ps_DeSeq_norm_pass_min_postDD_sup003)
tmppsA <- prune_samples(tmpps@sam_data$phenotype == "A", tmpps)
tmppsN <- prune_samples(tmpps@sam_data$phenotype == "N", tmpps)
A<-distance(tmppsA, "bray", type = "samples")
total_distanceA=A[1]+A[2]+A[3]
N<-distance(tmppsN, "bray", type = "samples")
total_distanceN=N[1]+N[2]+N[3]
tabtmp<-tibble(total_distanceA, total_distanceN, tmpps@sam_data$Family.group.ID[1])
colnames(tabtmp) <- c("TotDistanceAut", "TotDistanceNeu", "Family")
tab<-rbind(tab, tabtmp)
}
#tab
# run tests to check significance
taba<-tibble(tab$TotDistanceAut, rep("A", length(tab$TotDistanceAut)))
colnames(taba) <- c("TotalDistanceDiff_btwnTimepoints","phenotype" )
tabn<-tibble(tab$TotDistanceNeu, rep("N", length(tab$TotDistanceNeu)))
colnames(tabn) <- c("TotalDistanceDiff_btwnTimepoints","phenotype" )
finaltab<-rbind(tabn, taba)
# run tests to check significance
shapiro.test(finaltab$TotalDistanceDiff_btwnTimepoints) #not normal we need a reanking test
##
## Shapiro-Wilk normality test
##
## data: finaltab$TotalDistanceDiff_btwnTimepoints
## W = 0.87065, p-value = 3.487e-09
wilcox.test(TotalDistanceDiff_btwnTimepoints ~ phenotype, data=finaltab, var.equal=FALSE)
##
## Wilcoxon rank sum test with continuity correction
##
## data: TotalDistanceDiff_btwnTimepoints by phenotype
## W = 1948, p-value = 0.6354
## alternative hypothesis: true location shift is not equal to 0
# not significantly different by this total distance measure
#doing the same but w/ avg
tmpps <- prune_samples((ps_DeSeq_norm_pass_min_postDD_sup003@sam_data$Family.group.ID == "1"), ps_DeSeq_norm_pass_min_postDD_sup003)
tmppsA <- prune_samples(tmpps@sam_data$phenotype == "A", tmpps)
tmppsN <- prune_samples(tmpps@sam_data$phenotype == "N", tmpps)
A<-distance(tmppsA, "bray", type = "samples")
total_distanceA=ave(c(A[1],A[2],A[3]))[1]
N<-distance(tmppsN, "bray", type = "samples")
total_distanceN=ave(c(N[1],N[2],N[3]))[1]
tab<-tibble(total_distanceA, total_distanceN, tmpps@sam_data$Family.group.ID[1])
colnames(tab) <- c("AvgDistanceAut", "AvgDistanceNeu", "Family")
for(i in unique(ps_DeSeq_norm_pass_min_postDD_sup003@sam_data$Family.group.ID)){
tmpps <- prune_samples((ps_DeSeq_norm_pass_min_postDD_sup003@sam_data$Family.group.ID == i), ps_DeSeq_norm_pass_min_postDD_sup003)
tmppsA <- prune_samples(tmpps@sam_data$phenotype == "A", tmpps)
tmppsN <- prune_samples(tmpps@sam_data$phenotype == "N", tmpps)
A<-distance(tmppsA, "bray", type = "samples")
total_distanceA=ave(c(A[1],A[2],A[3]))[1]
N<-distance(tmppsN, "bray", type = "samples")
total_distanceN=ave(c(N[1],N[2],N[3]))[1]
tabtmp<-tibble(total_distanceA, total_distanceN, tmpps@sam_data$Family.group.ID[1])
colnames(tabtmp) <- c("AvgDistanceAut", "AvgDistanceNeu", "Family")
tab<-rbind(tab, tabtmp)
}
#tab
# run tests to check significance
taba<-tibble(tab$AvgDistanceAut, rep("A", length(tab$AvgDistanceAut)))
colnames(taba) <- c("AvgDistanceDiff_btwnTimepoints","phenotype" )
tabn<-tibble(tab$AvgDistanceNeu, rep("N", length(tab$AvgDistanceNeu)))
colnames(tabn) <- c("AvgDistanceDiff_btwnTimepoints","phenotype" )
finaltab2<-rbind(tabn, taba)
# run tests to check significance
shapiro.test(finaltab2$AvgDistanceDiff_btwnTimepoints) #not normal we need a reanking test
##
## Shapiro-Wilk normality test
##
## data: finaltab2$AvgDistanceDiff_btwnTimepoints
## W = 0.87065, p-value = 3.487e-09
wilcox.test(AvgDistanceDiff_btwnTimepoints ~ phenotype, data=finaltab2, var.equal=FALSE)
##
## Wilcoxon rank sum test with continuity correction
##
## data: AvgDistanceDiff_btwnTimepoints by phenotype
## W = 1948, p-value = 0.6354
## alternative hypothesis: true location shift is not equal to 0
#still not significant
# function to plot PCoA without NA points
wo_na_pcoa <- function(ps, pvar, ord_res){
# ord_res: ordinated object
keepid=!is.na(get_variable(ps, pvar)) &
get_variable(ps, pvar)!='NA' &
get_variable(ps, pvar)!=''
tmp_ps <- prune_samples(keepid, ps)
# get subset counts and metadata together
DF <- cbind(ord_res$vectors[row.names(sample_data(tmp_ps)), 1:2], sample_data(tmp_ps)[, pvar])
setnames(DF, pvar, 'testvar')
# get eigenvalues
eig=(ord_res$values$Eigenvalues/sum(ord_res$values$Eigenvalues))[1:2]*100
p <- ggplot(data=DF, aes(x=Axis.1, y=Axis.2, colour=testvar))+
geom_point(size=2)+
ggtitle(pvar)+
xlab(paste0('Axis.1 [', format(eig[1], digits=3), '%]'))+
ylab(paste0('Axis.2 [', format(eig[2], digits=3), '%]'))+
theme_minimal()+
theme(legend.title=element_blank(), legend.position="bottom")
print(p)
}
#Hard to find a confounding variable in impfactors that does not have a lot of NAs (no NAs required for DESEQ2)
runDESeq_time_confound <- function(ps, dcut){
diagdds = phyloseq_to_deseq2(ps, ~ Red.meat..consumption.frequency. + Within.study.sampling.date)
diagdds <- estimateSizeFactors(diagdds, type = "poscounts")
diagdds <- DESeq(diagdds,fitType="parametric", betaPrior = FALSE)
#resultsNames(diagdds): to determine the constrast
res = results(diagdds, contrast = c("phenotype", "A", "N"))
res$padj[is.na(res$padj)] = 1
sig <- res[res$padj < dcut,]
if (dim(sig)[1] == 0)
{sigtab<- as.data.frame(1, row.names="nothing")
colnames(sigtab) <- 'padj'}
else
{
sigtab <- data.frame(sig)
}
return(list(res, sigtab))
}
run_metagenom_seq_confound<-function(ps,maxit, mcut){
p_metag<-phyloseq_to_metagenomeSeq(ps)
#filtering at least 4 samples
p_metag= cumNorm(p_metag, p=0.75)
normFactor =normFactors(p_metag)
normFactor =log2(normFactor/median(normFactor) + 1)
#mod = model.matrix(~ASDorNeuroT +PairASD+ normFactor)
mod = model.matrix(~Most.recent.GI.episode.symptoms..M3. + Within.study.sampling.date +normFactor, data = pData(p_metag))
settings =zigControl(maxit =maxit, verbose =FALSE)
#settings =zigControl(tol = 1e-5, maxit = 30, verbose = TRUE, pvalMethod = 'bootstrap')
fit =fitZig(obj = p_metag, mod = mod, useCSSoffset = FALSE, control = settings)
#Note: changed fit$taxa to fit@taxa in light of error (probably from newer metagenomeseq ver.)
res_fit<-MRtable(fit, number = length(fit@taxa))
res_fit_nonfiltered <- copy(res_fit)
res_fit<-res_fit[res_fit$adjPvalues<mcut,]
#finally remove the ones that are not with enough samples
#mean_sample<-mean(calculateEffectiveSamples(fit))
#res_fit<-res_fit[res_fit$`counts in group 0` & res_fit$`counts in group 1` > mean_sample,]
Min_effec_samp<-calculateEffectiveSamples(fit)
Min_effec_samp<-Min_effec_samp[ names(Min_effec_samp) %in% rownames(res_fit)] #####there is a bug here
#manually removing the ones with "NA"
res_fit<-res_fit[grep("NA",rownames(res_fit), inv=T),]
res_fit$Min_sample<-Min_effec_samp
res_fit<-res_fit[res_fit$`+samples in group 0` >= Min_effec_samp & res_fit$`+samples in group 1` >= Min_effec_samp,]
return(list(res_fit_nonfiltered, res_fit))
}
#deseq_res<-runDESeq_time_confound(filtered_ps003, deseq_cut)
#Slope test between taxa and possible confounding variables that are ordinal
otus<-as.data.frame(otu_table(ps_DeSeq_norm_pass_min_postDD_sup003))
otus <- t(otus)
tmp<-slope.test(otus[,1], ps_DeSeq_norm_pass_min_postDD_sup003@sam_data$Red.meat..consumption.frequency.)
slopetesttab <- tibble(colnames(otus)[1], tmp$p)
colnames(slopetesttab) <- c("otu", "p_val")
for (i in 2:length(colnames(otus))){
tmp<-slope.test(otus[,i], ps_DeSeq_norm_pass_min_postDD_sup003@sam_data$Red.meat..consumption.frequency., method = 0)
tmp <- tibble(colnames(otus)[i], tmp$p)
colnames(tmp) <- c("otu", "p_val")
slopetesttab <-rbind(slopetesttab, tmp)
}
slope_p.val<-p.adjust(slopetesttab$p_val)
slope_sig_asvs_p.val<-slopetesttab$otu[slope_p.val < 0.05]
#slope_sig_asvs_p.val
#not confident that these taxa are truly significant with red meat since so many NAs in data
sample_data(ps_DeSeq_norm_pass_min_postDD_sup003)$Mobile.Autism.Risk.Assessment.Score <- as.numeric(sample_data(ps_DeSeq_norm_pass_min_postDD_sup003)$Mobile.Autism.Risk.Assessment.Score)
wo_na_pcoa(ps_DeSeq_norm_pass_min_postDD_sup003, 'Mobile.Autism.Risk.Assessment.Score', GP.ord)
wo_na_pcoa(ps_DeSeq_norm_pass_min_postDD_sup003, 'Probiotic..consumption.frequency.', GP.ord)
# Anti.infective
wo_na_pcoa(ps_DeSeq_norm_pass_min_postDD_sup003, 'Anti.infective', GP.ord)
# Minimum.time.since.antibiotics
sample_data(ps_DeSeq_norm_pass_min_postDD_sup003)$Minimum.time.since.antibiotics <- as.numeric(sample_data(ps_DeSeq_norm_pass_min_postDD_sup003)$Minimum.time.since.antibiotics)
wo_na_pcoa(ps_DeSeq_norm_pass_min_postDD_sup003, 'Minimum.time.since.antibiotics', GP.ord)
for(pvar in permanova_res[R2>permanova_cut & pvalue<permanova_pcut]$Variable){
wo_na_pcoa(ps_DeSeq_norm_pass_min_postDD_sup003, pvar, GP.ord)
}
sessionInfo()
## R version 3.6.2 (2019-12-12)
## Platform: x86_64-redhat-linux-gnu (64-bit)
## Running under: CentOS Linux 8 (Core)
##
## Matrix products: default
## BLAS/LAPACK: /usr/lib64/R/lib/libRblas.so
##
## locale:
## [1] LC_CTYPE=en_US.UTF-8 LC_NUMERIC=C
## [3] LC_TIME=en_US.UTF-8 LC_COLLATE=en_US.UTF-8
## [5] LC_MONETARY=en_US.UTF-8 LC_MESSAGES=en_US.UTF-8
## [7] LC_PAPER=en_US.UTF-8 LC_NAME=C
## [9] LC_ADDRESS=C LC_TELEPHONE=C
## [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C
##
## attached base packages:
## [1] stats4 parallel stats graphics grDevices utils datasets
## [8] methods base
##
## other attached packages:
## [1] phyloseq_1.30.0 biomformat_1.14.0
## [3] DESeq2_1.26.0 SummarizedExperiment_1.16.1
## [5] DelayedArray_0.12.3 BiocParallel_1.20.1
## [7] matrixStats_0.56.0 GenomicRanges_1.38.0
## [9] GenomeInfoDb_1.22.1 IRanges_2.20.2
## [11] S4Vectors_0.24.4 metagenomeSeq_1.28.2
## [13] RColorBrewer_1.1-2 glmnet_4.0-2
## [15] Matrix_1.2-18 limma_3.42.2
## [17] Biobase_2.46.0 BiocGenerics_0.32.0
## [19] vegan_2.5-6 lattice_0.20-40
## [21] permute_0.9-5 compositions_2.0-0
## [23] nlme_3.1-145 exactRankTests_0.8-31
## [25] smatr_3.4-8 ancom.R_1.1-3
## [27] doParallel_1.0.15 iterators_1.0.12
## [29] foreach_1.4.8 adegraphics_1.0-15
## [31] gridExtra_2.3 DT_0.14
## [33] pander_0.6.3 ggplot2_3.3.0
## [35] dplyr_0.8.5 reshape2_1.4.3
## [37] tidyr_1.0.2 knitr_1.28
## [39] devtools_2.2.2 usethis_1.5.1
## [41] data.table_1.12.8 tibble_3.0.2
## [43] shiny_1.5.0
##
## loaded via a namespace (and not attached):
## [1] tidyselect_1.0.0 RSQLite_2.2.0 AnnotationDbi_1.48.0
## [4] htmlwidgets_1.5.1 grid_3.6.2 IHW_1.14.0
## [7] munsell_0.5.0 codetools_0.2-16 withr_2.1.2
## [10] colorspace_1.4-1 rstudioapi_0.11 robustbase_0.93-5
## [13] bayesm_3.1-4 labeling_0.3 slam_0.1-47
## [16] GenomeInfoDbData_1.2.2 lpsymphony_1.16.0 bit64_0.9-7
## [19] farver_2.0.3 rhdf5_2.30.1 rprojroot_1.3-2
## [22] vctrs_0.3.1 TH.data_1.0-10 xfun_0.12
## [25] R6_2.4.1 locfit_1.5-9.4 bitops_1.0-6
## [28] assertthat_0.2.1 promises_1.1.1 scales_1.1.0
## [31] multcomp_1.4-13 nnet_7.3-13 gtable_0.3.0
## [34] processx_3.4.2 sandwich_2.5-1 rlang_0.4.6
## [37] genefilter_1.68.0 splines_3.6.2 acepack_1.4.1
## [40] checkmate_2.0.0 yaml_2.2.1 crosstalk_1.1.0.1
## [43] backports_1.1.5 httpuv_1.5.4 Hmisc_4.4-0
## [46] tensorA_0.36.1 tools_3.6.2 ellipsis_0.3.0
## [49] gplots_3.0.4 sessioninfo_1.1.1 Rcpp_1.0.4
## [52] plyr_1.8.6 base64enc_0.1-3 zlibbioc_1.32.0
## [55] purrr_0.3.3 RCurl_1.98-1.2 ps_1.3.2
## [58] prettyunits_1.1.1 rpart_4.1-15 Wrench_1.4.0
## [61] zoo_1.8-7 cluster_2.1.0 fs_1.3.2
## [64] magrittr_1.5 mvtnorm_1.1-0 pkgload_1.0.2
## [67] mime_0.9 evaluate_0.14 xtable_1.8-4
## [70] XML_3.99-0.3 jpeg_0.1-8.1 shape_1.4.4
## [73] testthat_2.3.2 compiler_3.6.2 KernSmooth_2.23-16
## [76] crayon_1.3.4 htmltools_0.5.0 mgcv_1.8-31
## [79] later_1.1.0.1 Formula_1.2-3 geneplotter_1.64.0
## [82] libcoin_1.0-5 DBI_1.1.0 MASS_7.3-51.5
## [85] ade4_1.7-15 cli_2.0.2 gdata_2.18.0
## [88] igraph_1.2.4.2 pkgconfig_2.0.3 coin_1.3-1
## [91] foreign_0.8-76 sp_1.4-1 annotate_1.64.0
## [94] multtest_2.42.0 XVector_0.26.0 stringr_1.4.0
## [97] callr_3.4.2 digest_0.6.25 Biostrings_2.54.0
## [100] rmarkdown_2.1 htmlTable_1.13.3 gtools_3.8.1
## [103] modeltools_0.2-23 lifecycle_0.2.0 jsonlite_1.6.1
## [106] Rhdf5lib_1.8.0 desc_1.2.0 fansi_0.4.1
## [109] pillar_1.4.3 fastmap_1.0.1 DEoptimR_1.0-8
## [112] pkgbuild_1.0.6 survival_3.2-3 glue_1.3.2
## [115] remotes_2.1.1 fdrtool_1.2.15 png_0.1-7
## [118] bit_1.1-15.2 stringi_1.4.6 blob_1.2.1
## [121] latticeExtra_0.6-29 caTools_1.18.0 memoise_1.1.0
## [124] ape_5.4